-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathparental_consolidation.go
More file actions
110 lines (101 loc) · 2.23 KB
/
parental_consolidation.go
File metadata and controls
110 lines (101 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*
File: parental_consolidation.go
Description:
Parent-domain consolidation and deduplication operations.
Extracted from parental_categories.go to group heavy tree-optimization algorithms.
*/
package main
import "strings"
// ---------------------------------------------------------------------------
// Parent-domain consolidation
// ---------------------------------------------------------------------------
func consolidateParentDomains(apex map[string]string, threshold, homogeneityPct int) int {
if threshold <= 0 {
return 0
}
if homogeneityPct <= 0 || homogeneityPct > 100 {
homogeneityPct = 90
}
total := 0
votes := make(map[string]map[string]int)
for {
for k := range votes {
delete(votes, k)
}
for d := range apex {
idx := strings.IndexByte(d, '.')
if idx < 0 {
continue
}
parent := d[idx+1:]
if _, exists := apex[parent]; exists {
continue
}
if votes[parent] == nil {
votes[parent] = make(map[string]int)
}
votes[parent][apex[d]]++
}
added := 0
for parent, catCounts := range votes {
if strings.IndexByte(parent, '.') < 0 {
continue
}
if isPublicSuffix(parent) {
continue
}
if isSharedHostingDomain(parent) {
continue
}
childCount := 0
for _, n := range catCounts {
childCount += n
}
if childCount < threshold {
continue
}
winCat, winCount := "", 0
for cat, n := range catCounts {
if n > winCount {
winCat, winCount = cat, n
}
}
if winCount*100/childCount < homogeneityPct {
continue
}
apex[parent] = winCat
added++
}
if added == 0 {
break
}
total += added
}
return total
}
// ---------------------------------------------------------------------------
// Apex deduplication
// ---------------------------------------------------------------------------
func dedupeApex(apex map[string]string) int {
var remove []string
for k, cat := range apex {
search := k
for {
idx := strings.IndexByte(search, '.')
if idx < 0 {
break
}
search = search[idx+1:]
if parentCat, parentExists := apex[search]; parentExists {
if parentCat == cat {
remove = append(remove, k)
}
break
}
}
}
for _, k := range remove {
delete(apex, k)
}
return len(remove)
}