Skip to content

Commit 9ace3d4

Browse files
authored
Bug: Re-enrichment duplicates work experiences creating the same employments without dates (#1185)
1 parent 8dc988d commit 9ace3d4

File tree

6 files changed

+159
-51
lines changed

6 files changed

+159
-51
lines changed

backend/src/database/migrations/U1690797541__fix-duplicate-work-experiences.sql

Whitespace-only changes.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
DO $$
2+
DECLARE
3+
_record RECORD;
4+
_experience JSONB;
5+
_to_cleanup UUID[];
6+
_with_dates_exist BOOLEAN;
7+
BEGIN
8+
-- iterate over every work experience, grouped by member and organization
9+
FOR _record IN SELECT
10+
"memberId",
11+
"organizationId",
12+
COUNT(*) AS count,
13+
JSONB_AGG(
14+
JSON_BUILD_OBJECT(
15+
'id', "id",
16+
'dateStart', TO_CHAR("dateStart", 'YYYY-MM-DD'),
17+
'dateEnd', TO_CHAR("dateEnd", 'YYYY-MM-DD'),
18+
'title', "title"
19+
)
20+
) AS experiences
21+
FROM "memberOrganizations"
22+
GROUP BY "memberId", "organizationId"
23+
ORDER BY 3 DESC
24+
LOOP
25+
_with_dates_exist := FALSE;
26+
_to_cleanup := ARRAY[]::UUID[];
27+
28+
-- ignore those that don't have duplicates
29+
IF _record.count <=1 THEN
30+
CONTINUE;
31+
END IF;
32+
33+
-- iterate over every experience
34+
FOR _experience IN SELECT JSONB_ARRAY_ELEMENTS(_record.experiences) LOOP
35+
RAISE NOTICE 'Processing record %, experience: %', _record."memberId", _experience;
36+
37+
-- if there is no start date, mark it for deletion
38+
-- but also check if there are any experiences with dates
39+
IF _experience->>'dateStart' IS NULL THEN
40+
_to_cleanup := _to_cleanup || (_experience->>'id')::UUID;
41+
ELSE
42+
_with_dates_exist := TRUE;
43+
END IF;
44+
END LOOP;
45+
46+
-- and then only delete duplicates if there are experiences with dates
47+
IF _with_dates_exist THEN
48+
RAISE NOTICE 'Deleting experiences: %', _to_cleanup;
49+
DELETE FROM "memberOrganizations" WHERE "id" = ANY (_to_cleanup);
50+
END IF;
51+
END LOOP;
52+
END;
53+
$$ LANGUAGE PLPGSQL

backend/src/database/repositories/memberEnrichmentCacheRepository.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { QueryTypes } from 'sequelize'
22
import { EnrichmentCache } from '../../services/premium/enrichment/types/memberEnrichmentTypes'
33
import { IRepositoryOptions } from './IRepositoryOptions'
4+
import SequelizeRepository from './sequelizeRepository'
45

56
class MemberEnrichmentCacheRepository {
67
/**
@@ -18,6 +19,7 @@ class MemberEnrichmentCacheRepository {
1819
options: IRepositoryOptions,
1920
): Promise<EnrichmentCache> {
2021
if (data && Object.keys(data).length > 0) {
22+
const transaction = SequelizeRepository.getTransaction(options)
2123
await options.database.sequelize.query(
2224
`INSERT INTO "memberEnrichmentCache" ("createdAt", "updatedAt", "memberId", "data")
2325
VALUES
@@ -31,6 +33,7 @@ class MemberEnrichmentCacheRepository {
3133
data: JSON.stringify(data),
3234
},
3335
type: QueryTypes.UPSERT,
36+
transaction,
3437
},
3538
)
3639
}
@@ -50,6 +53,7 @@ class MemberEnrichmentCacheRepository {
5053
memberId: string,
5154
options: IRepositoryOptions,
5255
): Promise<EnrichmentCache> {
56+
const transaction = SequelizeRepository.getTransaction(options)
5357
const records = await options.database.sequelize.query(
5458
`select *
5559
from "memberEnrichmentCache"
@@ -60,6 +64,7 @@ class MemberEnrichmentCacheRepository {
6064
memberId,
6165
},
6266
type: QueryTypes.SELECT,
67+
transaction,
6368
},
6469
)
6570

backend/src/database/repositories/memberRepository.ts

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,23 @@ class MemberRepository {
547547
group by "memberId", platform) mi
548548
group by mi."memberId"),
549549
member_organizations as (
550-
select "memberId", array_agg("organizationId") as orgs
550+
select
551+
"memberId",
552+
JSONB_AGG(
553+
DISTINCT JSONB_BUILD_OBJECT(
554+
'id', "organizationId",
555+
'memberOrganizations',
556+
JSONB_BUILD_OBJECT(
557+
'memberId', "memberId",
558+
'organizationId', "organizationId",
559+
'dateStart', "dateStart",
560+
'dateEnd', "dateEnd",
561+
'createdAt', "createdAt",
562+
'updatedAt', "updatedAt",
563+
'title', title
564+
)
565+
)
566+
) AS orgs
551567
from "memberOrganizations"
552568
where "memberId" = :memberId
553569
group by "memberId"
@@ -571,7 +587,7 @@ class MemberRepository {
571587
m."updatedById",
572588
i.username,
573589
si."segmentIds" as segments,
574-
coalesce(mo.orgs, array []::uuid[]) as "organizations"
590+
coalesce(mo.orgs, '[]'::JSONB) as "organizations"
575591
from members m
576592
inner join identities i on i."memberId" = m.id
577593
inner join segment_ids si on si."memberId" = m.id
@@ -3242,23 +3258,41 @@ class MemberRepository {
32423258
const seq = SequelizeRepository.getSequelize(options)
32433259
const transaction = SequelizeRepository.getTransaction(options)
32443260

3245-
const query = `
3246-
INSERT INTO "memberOrganizations" ("memberId", "organizationId", "createdAt", "updatedAt", "title", "dateStart", "dateEnd")
3247-
VALUES (:memberId, :organizationId, NOW(), NOW(), :title, :dateStart, :dateEnd)
3248-
ON CONFLICT ("memberId", "organizationId", "dateStart", "dateEnd") DO NOTHING
3249-
`
3261+
await seq.query(
3262+
`
3263+
DELETE FROM "memberOrganizations"
3264+
WHERE "memberId" = :memberId
3265+
AND "organizationId" = :organizationId
3266+
AND "dateEnd" IS NULL
3267+
`,
3268+
{
3269+
replacements: {
3270+
memberId,
3271+
organizationId,
3272+
},
3273+
type: QueryTypes.DELETE,
3274+
transaction,
3275+
},
3276+
)
32503277

3251-
await seq.query(query, {
3252-
replacements: {
3253-
memberId,
3254-
organizationId,
3255-
title: title || null,
3256-
dateStart: dateStart || null,
3257-
dateEnd: dateEnd || null,
3278+
await seq.query(
3279+
`
3280+
INSERT INTO "memberOrganizations" ("memberId", "organizationId", "createdAt", "updatedAt", "title", "dateStart", "dateEnd")
3281+
VALUES (:memberId, :organizationId, NOW(), NOW(), :title, :dateStart, :dateEnd)
3282+
ON CONFLICT ("memberId", "organizationId", "dateStart", "dateEnd") DO NOTHING
3283+
`,
3284+
{
3285+
replacements: {
3286+
memberId,
3287+
organizationId,
3288+
title: title || null,
3289+
dateStart: dateStart || null,
3290+
dateEnd: dateEnd || null,
3291+
},
3292+
type: QueryTypes.INSERT,
3293+
transaction,
32583294
},
3259-
type: QueryTypes.INSERT,
3260-
transaction,
3261-
})
3295+
)
32623296
}
32633297

32643298
static sortOrganizations(organizations) {

backend/src/services/memberService.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ export default class MemberService extends LoggerBase {
326326
}
327327
// We findOrCreate the organization and add it to the list of IDs
328328
const organizationRecord = await organizationService.findOrCreate(data)
329-
organizations.push(organizationRecord.id)
329+
organizations.push({ id: organizationRecord.id })
330330
}
331331
}
332332

@@ -349,14 +349,14 @@ export default class MemberService extends LoggerBase {
349349
if (domain) {
350350
const organizationRecord = await organizationService.findByUrl(domain)
351351
if (organizationRecord) {
352-
organizations.push(organizationRecord.id)
352+
organizations.push({ id: organizationRecord.id })
353353
}
354354
}
355355
}
356356
}
357357

358358
// Remove dups
359-
data.organizations = [...new Set(organizations)]
359+
data.organizations = lodash.uniqBy(organizations, 'id')
360360
}
361361

362362
const fillRelations = false

backend/src/services/premium/enrichment/memberEnrichmentService.ts

Lines changed: 47 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import {
3333
import OrganizationService from '../../organizationService'
3434
import MemberRepository from '../../../database/repositories/memberRepository'
3535
import OrganizationRepository from '../../../database/repositories/organizationRepository'
36+
import SequelizeRepository from '@/database/repositories/sequelizeRepository'
3637

3738
export default class MemberEnrichmentService extends LoggerBase {
3839
options: IServiceOptions
@@ -219,37 +220,48 @@ export default class MemberEnrichmentService extends LoggerBase {
219220
* @returns a promise that resolves to the enrichment data for the member
220221
*/
221222
async enrichOne(memberId) {
222-
// If the attributes have not been fetched yet, fetch them
223-
if (!this.attributes) {
224-
await this.getAttributes()
223+
const transaction = await SequelizeRepository.createTransaction(this.options)
224+
const options = {
225+
...this.options,
226+
transaction,
225227
}
226228

227-
// Create an instance of the MemberService and use it to look up the member
228-
const memberService = new MemberService(this.options)
229-
const member = await memberService.findById(memberId, false, false)
229+
try {
230+
// If the attributes have not been fetched yet, fetch them
231+
if (!this.attributes) {
232+
await this.getAttributes()
233+
}
230234

231-
// If the member's GitHub handle or email address is not available, throw an error
232-
if (!member.username[PlatformType.GITHUB] && member.emails.length === 0) {
233-
throw new Error400(this.options.language, 'enrichment.errors.noGithubHandleOrEmail')
234-
}
235+
// Create an instance of the MemberService and use it to look up the member
236+
const memberService = new MemberService(options)
237+
const member = await memberService.findById(memberId, false, false)
235238

236-
let enrichedFrom = ''
237-
let enrichmentData: EnrichmentAPIMember
238-
// If the member has a GitHub handle, use it to make a request to the Enrichment API
239-
if (member.username[PlatformType.GITHUB]) {
240-
enrichedFrom = 'github'
241-
enrichmentData = await this.getEnrichmentByGithubHandle(
242-
member.username[PlatformType.GITHUB][0],
243-
)
244-
} else if (member.emails.length > 0) {
245-
enrichedFrom = 'email'
246-
// If the member has an email address, use it to make a request to the Enrichment API
247-
enrichmentData = await this.getEnrichmentByEmail(member.emails[0])
248-
}
239+
// If the member's GitHub handle or email address is not available, throw an error
240+
if (!member.username[PlatformType.GITHUB] && member.emails.length === 0) {
241+
throw new Error400(this.options.language, 'enrichment.errors.noGithubHandleOrEmail')
242+
}
243+
244+
let enrichedFrom = ''
245+
let enrichmentData: EnrichmentAPIMember
246+
// If the member has a GitHub handle, use it to make a request to the Enrichment API
247+
if (member.username[PlatformType.GITHUB]) {
248+
enrichedFrom = 'github'
249+
enrichmentData = await this.getEnrichmentByGithubHandle(
250+
member.username[PlatformType.GITHUB][0],
251+
)
252+
} else if (member.emails.length > 0) {
253+
enrichedFrom = 'email'
254+
// If the member has an email address, use it to make a request to the Enrichment API
255+
enrichmentData = await this.getEnrichmentByEmail(member.emails[0])
256+
}
257+
258+
if (!enrichmentData) {
259+
await SequelizeRepository.commitTransaction(transaction)
260+
return null
261+
}
249262

250-
if (enrichmentData) {
251263
// save raw data to cache
252-
await MemberEnrichmentCacheRepository.upsert(memberId, enrichmentData, this.options)
264+
await MemberEnrichmentCacheRepository.upsert(memberId, enrichmentData, options)
253265

254266
const normalized = await this.normalize(member, enrichmentData)
255267

@@ -270,18 +282,18 @@ export default class MemberEnrichmentService extends LoggerBase {
270282
memberId: member.id,
271283
enrichedFrom,
272284
},
273-
this.options,
285+
options,
274286
)
275287

276-
const result = await memberService.upsert({
288+
let result = await memberService.upsert({
277289
...normalized,
278290
platform: Object.keys(member.username)[0],
279291
})
280292

281293
// for every work experience in `enrichmentData`
282294
// - upsert organization
283295
// - upsert `memberOrganization` relation
284-
const organizationService = new OrganizationService(this.options)
296+
const organizationService = new OrganizationService(options)
285297
if (enrichmentData.work_experiences) {
286298
for (const workExperience of enrichmentData.work_experiences) {
287299
const org = await organizationService.findOrCreate({
@@ -299,14 +311,18 @@ export default class MemberEnrichmentService extends LoggerBase {
299311
dateStart: workExperience.startDate,
300312
dateEnd,
301313
}
302-
await MemberRepository.createOrUpdateWorkExperience(data, this.options)
303-
await OrganizationRepository.includeOrganizationToSegments(org.id, this.options)
314+
await MemberRepository.createOrUpdateWorkExperience(data, options)
315+
await OrganizationRepository.includeOrganizationToSegments(org.id, options)
304316
}
305317
}
306318

319+
result = await memberService.findById(result.id, true, false)
320+
await SequelizeRepository.commitTransaction(transaction)
307321
return result
322+
} catch (error) {
323+
await SequelizeRepository.rollbackTransaction(transaction)
324+
throw error
308325
}
309-
return null
310326
}
311327

312328
async normalize(member: Member, enrichmentData: EnrichmentAPIMember) {

0 commit comments

Comments
 (0)