Skip to content

Commit 4c28118

Browse files
committed
feat: data/histogram: add ParseFileCSV(), ParseTable(), TransformBinNames(), TransformBinNamesByPrefix()`
1 parent 1f73d22 commit 4c28118

File tree

8 files changed

+768
-30
lines changed

8 files changed

+768
-30
lines changed

data/table/documents.go renamed to data/histogram/documents.go

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
package table
2-
3-
import (
4-
"github.com/grokify/gocharts/data/histogram"
5-
)
1+
package histogram
62

73
type DocumentsSet struct {
84
Meta DocumentsSetMeta `json:"meta"`
@@ -20,7 +16,7 @@ func (ds *DocumentsSet) Inflate() {
2016
}
2117

2218
func (ds *DocumentsSet) CreateHistogram(key string) {
23-
hg := histogram.NewHistogram()
19+
hg := NewHistogram()
2420

2521
//histogram := map[string]int{}
2622
for _, doc := range ds.Documents {
@@ -33,17 +29,17 @@ func (ds *DocumentsSet) CreateHistogram(key string) {
3329
}
3430
hg.Inflate()
3531
if ds.Meta.Histograms == nil {
36-
ds.Meta.Histograms = map[string]histogram.Histogram{}
32+
ds.Meta.Histograms = map[string]*Histogram{}
3733
}
3834
ds.Meta.Histograms[key] = hg
3935
}
4036

4137
type DocumentsSetMeta struct {
42-
Count int `json:"count"`
43-
Histograms map[string]histogram.Histogram `json:"histograms"`
38+
Count int `json:"count"`
39+
Histograms map[string]*Histogram `json:"histograms"`
4440
}
4541

4642
func NewDocumentsSetMeta() DocumentsSetMeta {
4743
return DocumentsSetMeta{
48-
Histograms: map[string]histogram.Histogram{}}
44+
Histograms: map[string]*Histogram{}}
4945
}

data/histogram/documents_read.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package histogram
2+
3+
import (
4+
"io"
5+
"io/ioutil"
6+
"os"
7+
8+
"github.com/grokify/simplego/encoding/csvutil"
9+
"github.com/grokify/simplego/encoding/jsonutil"
10+
"github.com/grokify/simplego/type/stringsutil"
11+
)
12+
13+
func ReadMergeFilterCSVFiles(inPaths []string, outPath string, inComma rune, inStripBom bool, andFilter map[string]stringsutil.MatchInfo) (DocumentsSet, error) {
14+
//data := JsonRecordsInfo{Records: []map[string]string{}}
15+
data := NewDocumentsSet()
16+
17+
for _, inPath := range inPaths {
18+
reader, inFile, err := csvutil.NewReader(inPath, inComma, inStripBom)
19+
if err != nil {
20+
return data, err
21+
}
22+
23+
csvHeader := csvutil.CSVHeader{}
24+
j := -1
25+
26+
for {
27+
line, err := reader.Read()
28+
if err == io.EOF {
29+
break
30+
} else if err != nil {
31+
return data, err
32+
}
33+
j++
34+
35+
if j == 0 {
36+
csvHeader.Columns = line
37+
continue
38+
}
39+
match, err := csvHeader.RecordMatch(line, andFilter)
40+
if err != nil {
41+
return data, err
42+
}
43+
if !match {
44+
continue
45+
}
46+
47+
mss := csvHeader.RecordToMSS(line)
48+
data.Documents = append(data.Documents, mss)
49+
}
50+
err = inFile.Close()
51+
if err != nil {
52+
return data, err
53+
}
54+
}
55+
data.Inflate()
56+
return data, nil
57+
}
58+
59+
func MergeFilterCSVFilesToJSON(inPaths []string, outPath string, inComma rune, inStripBom bool, perm os.FileMode, andFilter map[string]stringsutil.MatchInfo) error {
60+
data, err := ReadMergeFilterCSVFiles(inPaths, outPath, inComma, inStripBom, andFilter)
61+
if err != nil {
62+
return err
63+
}
64+
bytes, err := jsonutil.MarshalSimple(data, "", " ")
65+
if err != nil {
66+
return err
67+
}
68+
return ioutil.WriteFile(outPath, bytes, perm)
69+
}

data/histogram/histogram.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ type Histogram struct {
99
BinsFrequency map[string]int `json:"binsFrequency"`
1010
}
1111

12-
func NewHistogram() Histogram {
13-
return Histogram{BinsFrequency: map[string]int{}}
12+
func NewHistogram() *Histogram {
13+
return &Histogram{BinsFrequency: map[string]int{}}
1414
}
1515

1616
func (h *Histogram) Inflate() {
@@ -26,8 +26,8 @@ func (h *Histogram) Add(bin string, count int) {
2626
}
2727

2828
type HistogramSet struct {
29-
Meta HistogramSetMetadata `json:"meta,omitempty"`
30-
HistogramMap map[string]Histogram `json:"histograms"`
29+
Meta HistogramSetMetadata `json:"meta,omitempty"`
30+
HistogramMap map[string]*Histogram `json:"histograms"`
3131
}
3232

3333
type HistogramSetMetadata struct {
@@ -43,12 +43,12 @@ func NewHistogramSetMetadata() HistogramSetMetadata {
4343
func NewHistogramSet() HistogramSet {
4444
return HistogramSet{
4545
Meta: NewHistogramSetMetadata(),
46-
HistogramMap: map[string]Histogram{}}
46+
HistogramMap: map[string]*Histogram{}}
4747
}
4848

4949
func (hs *HistogramSet) Add(name, bin string, count int) {
5050
if hs.HistogramMap == nil {
51-
hs.HistogramMap = map[string]Histogram{}
51+
hs.HistogramMap = map[string]*Histogram{}
5252
}
5353
if _, ok := hs.HistogramMap[name]; !ok {
5454
hs.HistogramMap[name] = NewHistogram()

data/histogram/read.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package histogram
2+
3+
import (
4+
"fmt"
5+
"strconv"
6+
"strings"
7+
8+
"github.com/grokify/gocharts/data/table"
9+
"github.com/grokify/simplego/type/stringsutil"
10+
)
11+
12+
// ParseFileCSV reads a CSV using default settings of
13+
// `,` separator, header row and BOM to be stripped. If you
14+
// have other configurations, use `table.ReadFile()` directly
15+
// and call `HistogramFromTable()`.
16+
func ParseFileCSV(file string, binNameColIdx, binFrequencyColIdx uint) (*Histogram, error) {
17+
tbl, err := table.ReadFile(file, ',', true, true)
18+
if err != nil {
19+
return nil, err
20+
}
21+
return ParseTable(tbl, binNameColIdx, binFrequencyColIdx)
22+
}
23+
24+
// ParseTable parses a `table.Table` to a `Histogram` given a table,
25+
// binName column index and binFrequency column index. Empty rows are
26+
// skipped.
27+
func ParseTable(tbl table.Table, binNameColIdx, binFrequencyColIdx uint) (*Histogram, error) {
28+
hist := NewHistogram()
29+
for _, rec := range tbl.Records {
30+
if stringsutil.SliceIsEmpty(rec, true) {
31+
continue
32+
}
33+
if int(binNameColIdx) >= len(rec) {
34+
return hist, fmt.Errorf("error row length smaller than binNameColIdx: recordLen[%d] binNameColIdx [%d]",
35+
len(rec), binNameColIdx)
36+
} else if int(binFrequencyColIdx) >= len(rec) {
37+
return hist, fmt.Errorf("error row length smaller than binFrequencyColIdx: recordLen[%d] binFrequencyColIdx [%d]",
38+
len(rec), binFrequencyColIdx)
39+
}
40+
binName := strings.TrimSpace(rec[binNameColIdx])
41+
binFreq := strings.TrimSpace(rec[binFrequencyColIdx])
42+
if len(binName) == 0 && len(binFreq) == 0 {
43+
continue
44+
}
45+
if len(binFreq) == 0 {
46+
hist.Add(binName, 0)
47+
} else {
48+
binFreqInt, err := strconv.Atoi(binFreq)
49+
if err != nil {
50+
return hist, fmt.Errorf("error strconv frequency string[%s] err[%s]", binFreq, err.Error())
51+
}
52+
hist.Add(binName, binFreqInt)
53+
}
54+
}
55+
hist.Inflate()
56+
return hist, nil
57+
}

data/histogram/transform.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package histogram
2+
3+
import (
4+
"strings"
5+
)
6+
7+
// TransformBinNames modifies bin names and returns a new
8+
// histogram.
9+
func TransformBinNames(hist *Histogram, xfFunc func(input string) string) *Histogram {
10+
if hist == nil {
11+
return nil
12+
}
13+
newHist := NewHistogram()
14+
for binName, binFreq := range hist.BinsFrequency {
15+
newHist.Add(xfFunc(binName), binFreq)
16+
}
17+
return newHist
18+
}
19+
20+
// TransformBinNamesByPrefix modifies bin names and returns a new
21+
// histogram.
22+
func TransformBinNamesByPrefix(hist *Histogram, xfMap map[string]string) *Histogram {
23+
if hist == nil {
24+
return nil
25+
}
26+
return TransformBinNames(hist,
27+
func(oldName string) string {
28+
for oldPrefix, newName := range xfMap {
29+
if strings.Index(oldName, oldPrefix) == 0 {
30+
return newName
31+
}
32+
}
33+
return oldName
34+
},
35+
)
36+
}

data/table/read.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@ import (
66
"fmt"
77
"io"
88
"io/ioutil"
9-
"os"
109
"strings"
1110

1211
"github.com/grokify/simplego/encoding/csvutil"
13-
"github.com/grokify/simplego/encoding/jsonutil"
1412
"github.com/grokify/simplego/type/stringsutil"
1513
"github.com/pkg/errors"
1614
)
@@ -111,6 +109,7 @@ func ReadFile(path string, comma rune, hasHeader, stripBom bool) (Table, error)
111109
return tbl, nil
112110
}
113111

112+
/*
114113
func ReadMergeFilterCSVFiles(inPaths []string, outPath string, inComma rune, inStripBom bool, andFilter map[string]stringsutil.MatchInfo) (DocumentsSet, error) {
115114
//data := JsonRecordsInfo{Records: []map[string]string{}}
116115
data := NewDocumentsSet()
@@ -156,7 +155,8 @@ func ReadMergeFilterCSVFiles(inPaths []string, outPath string, inComma rune, inS
156155
data.Inflate()
157156
return data, nil
158157
}
159-
158+
*/
159+
/*
160160
func MergeFilterCSVFilesToJSON(inPaths []string, outPath string, inComma rune, inStripBom bool, perm os.FileMode, andFilter map[string]stringsutil.MatchInfo) error {
161161
data, err := ReadMergeFilterCSVFiles(inPaths, outPath, inComma, inStripBom, andFilter)
162162
if err != nil {
@@ -168,6 +168,7 @@ func MergeFilterCSVFilesToJSON(inPaths []string, outPath string, inComma rune, i
168168
}
169169
return ioutil.WriteFile(outPath, bytes, perm)
170170
}
171+
*/
171172

172173
func ReadCSVFilesSingleColumnValuesString(files []string, sep string, hasHeader, trimSpace bool, col uint, condenseUniqueSort bool) ([]string, error) {
173174
values := []string{}

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ go 1.15
44

55
require (
66
github.com/360EntSecGroup-Skylar/excelize v1.4.1
7+
github.com/blend/go-sdk v1.20210518.1 // indirect
78
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
89
github.com/grokify/elastirad-go v0.0.2
9-
github.com/grokify/simplego v0.0.21
10+
github.com/grokify/simplego v0.26.5
1011
github.com/jessevdk/go-flags v1.5.0
1112
github.com/olekukonko/tablewriter v0.0.5
1213
github.com/pkg/errors v0.9.1

0 commit comments

Comments
 (0)