Skip to content

Commit b379845

Browse files
authored
feat: Duplicate Detection: simple view to show + merge potential duplicates (#3877)
closes #3875
1 parent 269bfa4 commit b379845

9 files changed

Lines changed: 588 additions & 0 deletions

src/app/core/config/config.service.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { addDefaultNoteDetailsConfig } from "../../child-dev-project/notes/add-d
66
import { addDefaultTodoViews } from "../../features/todos/add-default-todo-views";
77
import { migrateInheritedFieldConfig } from "../../features/inherited-field/inherited-field-config-migration";
88
import { addDefaultImportViewConfig } from "../import/add-default-import-view";
9+
import { addDefaultReviewDuplicatesViewConfig } from "../../features/de-duplication/add-default-review-duplicates-view";
910
import { EntityDatatype } from "../basic-datatypes/entity/entity.datatype";
1011
import { DefaultValueConfig } from "../default-values/default-value-config";
1112
import { PanelComponent } from "../entity-details/EntityDetailsConfig";
@@ -160,6 +161,7 @@ export class ConfigService extends LatestEntityLoader<Config> {
160161
addDefaultNoteDetailsConfig,
161162
addDefaultTodoViews,
162163
addDefaultImportViewConfig,
164+
addDefaultReviewDuplicatesViewConfig,
163165
];
164166

165167
const newDoc = JSON.parse(JSON.stringify(doc), (_that, rawValue) => {

src/app/core/entity-list/entity-list/entity-list.component.html

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,20 @@ <h2>{{ title }}</h2>
263263
</button>
264264
}
265265

266+
@if (!entityConstructor.isInternalEntity) {
267+
<button
268+
mat-menu-item
269+
[routerLink]="['/review-duplicates']"
270+
[queryParams]="{ entityType: entityConstructor.ENTITY_TYPE }"
271+
>
272+
<fa-icon
273+
class="standard-icon-with-text color-accent"
274+
icon="copy"
275+
></fa-icon>
276+
<span i18n>Review Possible Duplicates</span>
277+
</button>
278+
}
279+
266280
<ng-content select="[mat-menu-item]"></ng-content>
267281
</mat-menu>
268282

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import { ConfigMigration } from "../../core/config/config-migration";
2+
3+
export const addDefaultReviewDuplicatesViewConfig: ConfigMigration = (
4+
key,
5+
configPart,
6+
) => {
7+
const data = configPart?.["data"];
8+
if (
9+
configPart?.["_id"] !== "Config:CONFIG_ENTITY" ||
10+
!data ||
11+
typeof data !== "object" ||
12+
Array.isArray(data)
13+
) {
14+
return configPart;
15+
}
16+
17+
if (!data["view:review-duplicates"]) {
18+
data["view:review-duplicates"] = {
19+
component: "ReviewDuplicates",
20+
_id: "view:review-duplicates",
21+
};
22+
}
23+
24+
return configPart;
25+
};

src/app/features/de-duplication/de-duplication-module.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,11 @@ const dynamicComponents: [string, AsyncComponent][] = [
3535
(c) => c.BulkMergeRecordsComponent,
3636
),
3737
],
38+
[
39+
"ReviewDuplicates",
40+
() =>
41+
import("./review-duplicates/review-duplicates.component").then(
42+
(c) => c.ReviewDuplicatesComponent,
43+
),
44+
],
3845
];
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
import { TestBed } from "@angular/core/testing";
2+
import {
3+
mockEntityMapperProvider,
4+
MockEntityMapperService,
5+
} from "../../core/entity/entity-mapper/mock-entity-mapper-service";
6+
import { EntityMapperService } from "../../core/entity/entity-mapper/entity-mapper.service";
7+
import { CoreTestingModule } from "../../utils/core-testing.module";
8+
import { TestEntity } from "../../utils/test-utils/TestEntity";
9+
import { DuplicateDetectionService } from "./duplicate-detection.service";
10+
11+
describe("DuplicateDetectionService", () => {
12+
let service: DuplicateDetectionService;
13+
let entityMapper: MockEntityMapperService;
14+
15+
beforeEach(() => {
16+
TestBed.configureTestingModule({
17+
imports: [CoreTestingModule],
18+
providers: [...mockEntityMapperProvider()],
19+
});
20+
service = TestBed.inject(DuplicateDetectionService);
21+
entityMapper = TestBed.inject(
22+
EntityMapperService,
23+
) as MockEntityMapperService;
24+
});
25+
26+
afterEach(() => vi.restoreAllMocks());
27+
28+
it("should return a pair for two entities matching on a single field (case-insensitive)", async () => {
29+
const a = TestEntity.create({ name: "Alice" });
30+
const b = TestEntity.create({ name: "alice" });
31+
entityMapper.addAll([a, b]);
32+
33+
const result = await service.findDuplicates(TestEntity, ["name"]);
34+
35+
expect(result).toHaveLength(1);
36+
expect(result[0].record).toBe(a);
37+
expect(result[0].possibleDuplicate).toBe(b);
38+
});
39+
40+
it("should return a pair when names differ only by Unicode whitespace (non-breaking space)", async () => {
41+
const a = TestEntity.create({ name: "Alice\u00A0Smith" });
42+
const b = TestEntity.create({ name: "Alice Smith" });
43+
entityMapper.addAll([a, b]);
44+
45+
const result = await service.findDuplicates(TestEntity, ["name"]);
46+
47+
expect(result).toHaveLength(1);
48+
});
49+
50+
it("should return a pair only when ALL selected fields match", async () => {
51+
const a = TestEntity.create({ name: "Alice", other: "X" });
52+
const b = TestEntity.create({ name: "alice", other: "X" });
53+
const c = TestEntity.create({ name: "alice", other: "Y" });
54+
entityMapper.addAll([a, b, c]);
55+
56+
const result = await service.findDuplicates(TestEntity, ["name", "other"]);
57+
58+
expect(result).toHaveLength(1);
59+
expect(result[0].record).toBe(a);
60+
expect(result[0].possibleDuplicate).toBe(b);
61+
});
62+
63+
it("should not return a pair when field values differ", async () => {
64+
const a = TestEntity.create({ name: "Alice" });
65+
const b = TestEntity.create({ name: "Bob" });
66+
entityMapper.addAll([a, b]);
67+
68+
const result = await service.findDuplicates(TestEntity, ["name"]);
69+
70+
expect(result).toHaveLength(0);
71+
});
72+
73+
it("should not return a pair when a field value is empty on either entity", async () => {
74+
const a = TestEntity.create({ name: "" });
75+
const b = TestEntity.create({ name: "" });
76+
entityMapper.addAll([a, b]);
77+
78+
const result = await service.findDuplicates(TestEntity, ["name"]);
79+
80+
expect(result).toHaveLength(0);
81+
});
82+
83+
it("should not return a pair when a field value is null or undefined", async () => {
84+
const a = TestEntity.create({});
85+
const b = TestEntity.create({});
86+
entityMapper.addAll([a, b]);
87+
88+
const result = await service.findDuplicates(TestEntity, ["name"]);
89+
90+
expect(result).toHaveLength(0);
91+
});
92+
93+
it("should return only 1 pair for 3 mutually matching entities (no duplicate rows)", async () => {
94+
const a = TestEntity.create({ name: "Alice" });
95+
const b = TestEntity.create({ name: "alice" });
96+
const c = TestEntity.create({ name: "ALICE" });
97+
entityMapper.addAll([a, b, c]);
98+
99+
const result = await service.findDuplicates(TestEntity, ["name"]);
100+
101+
expect(result).toHaveLength(1);
102+
});
103+
104+
it("should return empty array when there are no entities", async () => {
105+
const result = await service.findDuplicates(TestEntity, ["name"]);
106+
107+
expect(result).toHaveLength(0);
108+
});
109+
110+
it("should return empty array when no fields are selected", async () => {
111+
const a = TestEntity.create({ name: "Alice" });
112+
const b = TestEntity.create({ name: "Alice" });
113+
entityMapper.addAll([a, b]);
114+
115+
const result = await service.findDuplicates(TestEntity, []);
116+
117+
expect(result).toHaveLength(0);
118+
});
119+
120+
it("should not treat object-valued fields as duplicates", async () => {
121+
const a = TestEntity.create({ name: "Alice" });
122+
const b = TestEntity.create({ name: "Alice" });
123+
(a as unknown as Record<string, unknown>)["metadata"] = { key: "A" };
124+
(b as unknown as Record<string, unknown>)["metadata"] = { key: "B" };
125+
entityMapper.addAll([a, b]);
126+
127+
const result = await service.findDuplicates(TestEntity, ["metadata"]);
128+
129+
expect(result).toHaveLength(0);
130+
});
131+
132+
it("should match configurable-enum-like objects by id", async () => {
133+
const a = TestEntity.create({ name: "Alice" });
134+
const b = TestEntity.create({ name: "Bob" });
135+
(a as unknown as Record<string, unknown>)["center"] = {
136+
id: "barabazar",
137+
label: "Barabazar",
138+
};
139+
(b as unknown as Record<string, unknown>)["center"] = {
140+
id: "barabazar",
141+
label: "Bara Bazar",
142+
};
143+
entityMapper.addAll([a, b]);
144+
145+
const result = await service.findDuplicates(TestEntity, ["center"]);
146+
147+
expect(result).toHaveLength(1);
148+
expect(result[0].record).toBe(a);
149+
expect(result[0].possibleDuplicate).toBe(b);
150+
});
151+
});
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import { inject, Injectable } from "@angular/core";
2+
import { EntityMapperService } from "#src/app/core/entity/entity-mapper/entity-mapper.service";
3+
import { Entity, EntityConstructor } from "#src/app/core/entity/model/entity";
4+
5+
export interface DuplicatePair {
6+
record: Entity;
7+
possibleDuplicate: Entity;
8+
}
9+
10+
@Injectable({
11+
providedIn: "root",
12+
})
13+
export class DuplicateDetectionService {
14+
private readonly entityMapper = inject(EntityMapperService);
15+
16+
async findDuplicates(
17+
entityConstructor: EntityConstructor,
18+
fields: string[],
19+
): Promise<DuplicatePair[]> {
20+
if (fields.length === 0) {
21+
return [];
22+
}
23+
24+
const entities = await this.entityMapper.loadType(entityConstructor);
25+
const pairs: DuplicatePair[] = [];
26+
const usedIds = new Set<string>();
27+
28+
for (let i = 0; i < entities.length; i++) {
29+
if (usedIds.has(entities[i].getId())) continue;
30+
for (let j = i + 1; j < entities.length; j++) {
31+
if (
32+
!usedIds.has(entities[j].getId()) &&
33+
this.allFieldsMatch(entities[i], entities[j], fields)
34+
) {
35+
pairs.push({ record: entities[i], possibleDuplicate: entities[j] });
36+
usedIds.add(entities[i].getId());
37+
usedIds.add(entities[j].getId());
38+
break;
39+
}
40+
}
41+
}
42+
43+
return pairs;
44+
}
45+
46+
private allFieldsMatch(a: Entity, b: Entity, fields: string[]): boolean {
47+
return fields.every((field) => {
48+
const valA = this.normalizeValue(a[field]);
49+
const valB = this.normalizeValue(b[field]);
50+
return valA !== "" && valA === valB;
51+
});
52+
}
53+
54+
private normalizeValue(value: unknown): string {
55+
if (value == null) return "";
56+
if (value instanceof Date) return value.toISOString().toLowerCase();
57+
// Arrays are intentionally excluded in this first exact-match implementation.
58+
// Selecting an array field will therefore not yield duplicate matches.
59+
if (Array.isArray(value)) return "";
60+
61+
if (typeof value === "object") {
62+
const idValue = (value as Record<string, unknown>)["id"];
63+
return typeof idValue === "string" ||
64+
typeof idValue === "number" ||
65+
typeof idValue === "boolean"
66+
? String(idValue).normalize("NFKC").trim().toLowerCase()
67+
: "";
68+
}
69+
70+
if (
71+
typeof value !== "string" &&
72+
typeof value !== "number" &&
73+
typeof value !== "boolean"
74+
) {
75+
return "";
76+
}
77+
78+
return String(value)
79+
.normalize("NFKC")
80+
.replaceAll(/\s+/g, " ")
81+
.trim()
82+
.toLowerCase();
83+
}
84+
}

0 commit comments

Comments
 (0)