Skip to content

Commit d8e8f19

Browse files
committed
Re-factored code. Extended code for Extracting KeyPhrases
1 parent 3acb0c8 commit d8e8f19

File tree

13 files changed

+32
-62
lines changed

13 files changed

+32
-62
lines changed

PageRank/Graph/GraphNode.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
1+
using System.Collections.Generic;
32

43
namespace PageRank.Graph
54
{

PageRank/Graph/UnDirectedGraph.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using System;
2-
using System.Diagnostics;
32

43
namespace PageRank.Graph
54
{

TextRank/ExtractKeyPhrases.cs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,14 @@
33

44
namespace TextRank
55
{
6-
public class ExtractKeyPhrases
6+
public static class ExtractKeyPhrases
77
{
8-
9-
public Tuple<string, List<string>> Extract(string sentence, int wordLength = 100)
8+
public static Tuple<string, List<string>> KeyPhrases(this string sentence, int wordLength = 100)
109
{
1110
var keyWords = ExtractKeyword.Extract.GetKeyWordsList(sentence);
12-
1311
var summary = ExtractSummary.Extract.ExtractParagraphSummary(sentence, wordLength);
14-
1512
return new Tuple<string, List<string>>(summary, keyWords);
1613
}
17-
18-
1914
}
2015
}
2116

TextRank/ExtractKeyword.cs

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
using System;
1+
using System.Linq;
22
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Text;
5-
using System.Threading.Tasks;
6-
using PageRank.Graph;
3+
74
using PageRank.Rank;
85
using TextRank.Helpers;
96
using TextRank.POSTagger;
@@ -16,21 +13,23 @@ internal sealed class ExtractKeyword
1613

1714
public List<string> GetKeyWordsList(string sentence)
1815
{
16+
IList<string> joinedKeywords = null;
1917
var taggedList = WordPOSTagger.GetPosTaggedTokens(sentence);
2018
var directedGraph = GraphUtil.GraphInstance.BuildPOSGraph<string>(taggedList);
2119
var rank = new PageRank<string>();
2220
var rankedDictionary = rank.Rank(directedGraph);
2321

24-
var word_list = ExtractUtil.instance.GetNormalizedUniqueWordList(taggedList);
22+
var wordList = ExtractUtil.instance.GetNormalizedUniqueWordList(taggedList);
2523

26-
var keywords = rankedDictionary?.OrderByDescending(p => p.Value).Take(rankedDictionary.Count/3).Select(p => p.Key).ToList();
27-
IList<string> joinedKeywords = null;
28-
if (keywords != null)
29-
joinedKeywords= ExtractUtil.instance.JoinAdjacentWords(word_list, keywords);
30-
return joinedKeywords as List<string>;
24+
var keywords = rankedDictionary?.OrderByDescending(p => p.Value)
25+
.Take(rankedDictionary.Count/3)
26+
.Select(p => p.Key)
27+
.ToList();
3128

29+
if (keywords != null)
30+
joinedKeywords= ExtractUtil.instance.JoinAdjacentWords(wordList, keywords);
3231

33-
32+
return joinedKeywords as List<string>;
3433
}
3534
}
3635
}

TextRank/ExtractSummary.cs

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
1+
using System.Linq;
42
using System.Text;
3+
using System.Collections.Generic;
4+
55
using PageRank.Rank;
66
using TextRank.Helpers;
77
using TextRank.POSTagger;
@@ -15,19 +15,16 @@ internal sealed class ExtractSummary
1515
public string ExtractParagraphSummary(string paragraph, int wordLength =100)
1616
{
1717
var taggedSentences = SentencePOSTagger.GetTaggedSentences(paragraph);
18-
1918
var directedGraph = GraphUtil.GraphInstance.BuildPOSGraph<string>(taggedSentences);
20-
21-
2219
var rankedDictionary = new PageRank<string>().Rank(directedGraph);
23-
2420
var rankedSentencesList = rankedDictionary?.ToList().OrderByDescending(p => p.Value).Select(x => x.Key).ToList();
25-
var top_sentences = new List<string>();
21+
var topSentences = new List<string>();
2622

2723
if (rankedSentencesList != null)
2824
{
2925
int wordCount = 0;
3026
int index = 0;
27+
3128
foreach (var sentences in rankedSentencesList)
3229
{
3330
wordCount = sentences.Split(null).Length + wordCount;
@@ -36,31 +33,29 @@ public string ExtractParagraphSummary(string paragraph, int wordLength =100)
3633
break;
3734
}
3835
index++;
39-
4036
}
41-
if (wordCount < wordLength) index--;
42-
43-
top_sentences = rankedSentencesList.Take(index).ToList();
4437

38+
if (wordCount < wordLength) index--;
39+
topSentences = rankedSentencesList.Take(index).ToList();
4540
}
4641

4742
StringBuilder summry = new StringBuilder("");
4843

49-
foreach (var sent in taggedSentences)
44+
foreach (var taggedSentence in taggedSentences)
5045
{
51-
foreach (var summary_sentence in top_sentences)
46+
foreach (var summarySentence in topSentences)
5247
{
53-
if (summary_sentence.Equals(sent))
48+
if (summarySentence.Equals(taggedSentence))
5449
{
55-
summry.Append(summary_sentence);
50+
summry.Append(summarySentence);
5651
summry.Append("<br/><br/>");
5752
}
5853
}
5954
}
6055

61-
var orig_length = paragraph.Length;
62-
var summary = summry.ToString(); //string.Join(" ", summary_word);
63-
var summarized_length = summry.Length;
56+
var origLength = paragraph.Length;
57+
var summary = summry.ToString();
58+
var summarizedLength = summry.Length;
6459

6560
return summary;
6661
}

TextRank/Helpers/ExtentionMethod.cs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Text;
5-
using System.Threading.Tasks;
1+
using System.Collections.Generic;
62

73
namespace TextRank.Helpers
84
{

TextRank/Helpers/ExtractUtil.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3-
using System.ComponentModel;
43
using System.Linq;
5-
using System.Text;
6-
using System.Threading.Tasks;
74

85
namespace TextRank.Helpers
96
{

TextRank/Helpers/GraphUtil.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Linq;
4-
using System.Security.AccessControl;
5-
using System.Text;
6-
using System.Threading.Tasks;
74
using PageRank.Graph;
85

96
namespace TextRank.Helpers

TextRank/LevenhteinDistance.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ public int Calculate(string firstString, string secondString)
4646
_editDistanceArray[i - 1, j - 1] + cost);
4747
}
4848
}
49+
4950
// Step 7
5051
return _editDistanceArray[firstStringLength, secondStringLength];
5152
}

TextRank/POSTagger/SentencePOSTagger.cs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Text;
5-
using System.Threading.Tasks;
62
using OpenNLP.Tools.SentenceDetect;
73

84
namespace TextRank.POSTagger

TextRank/Properties/AssemblyInfo.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using System.Reflection;
2-
using System.Runtime.CompilerServices;
32
using System.Runtime.InteropServices;
43

54
// General Information about an assembly is controlled through the following

TextSummarize/Program.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@ static void Main(string[] args)
1212
.WriteTo.File("log.txt", rollingInterval: RollingInterval.Day)
1313
.WriteTo.Console()
1414
.CreateLogger();
15-
16-
var extractKeyPhrases = new ExtractKeyPhrases();
17-
15+
1816
var sentence =
1917
"LINCOLNSHIRE, IL With next-generation video game systems such as the Xbox One and the Playstation 4 hitting stores later this month, the console wars got even hotter today as electronics manufacturer Zenith announced the release of its own console, the Gamespace Pro, which arrives in stores Nov. 19. “With its sleek silver-and-gray box, double-analog-stick controllers, ability to play CDs, and starting price of $374.99, the Gamespace Pro is our way of saying, ‘Move over, Sony and Microsoft, Zenith is now officially a player in the console game,’” said Zenith CEO Michael Ahn at a Gamespace Pro press event, showcasing the system’s launch titles MoonChaser: Radiation, Cris Collinsworth’s Pigskin 2013, and survival-horror thriller InZomnia. “With over nine launch titles, 3D graphics, and the ability to log on to the internet using our Z-Connect technology, Zenith is finally poised to make some big waves in the video game world.” According to Zenith representatives, over 650 units have already been preordered.";
2018
var sentence1 =
@@ -28,7 +26,7 @@ static void Main(string[] args)
2826
The deal, which valued the Bengaluru-based company at USD 20.8 billion, is believed to be part of Walmart's strategy to strengthen presence in the Indian market and also compete head-on with global rival, Amazon.
2927
Amazon is also a rival to Flipkart and the two are locked in an intense battle for leadership in the booming Indian e-commerce market that is forecast to touch USD 200 billion in the next few years.";
3028

31-
var phrases = extractKeyPhrases.Extract(sentence1);
29+
var phrases = sentence1.KeyPhrases();
3230

3331
Log.Information("Extracted Phrases : {Phrases}", phrases.Item1);
3432
Log.Information(sentence1);

TextSummarize/Properties/AssemblyInfo.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using System.Reflection;
2-
using System.Runtime.CompilerServices;
32
using System.Runtime.InteropServices;
43

54
// General Information about an assembly is controlled through the following

0 commit comments

Comments
 (0)