Skip to content

Commit 3be0d38

Browse files
authored
Existing website on enriching organizations won't use enrichment data points (#1758)
1 parent fc206f4 commit 3be0d38

File tree

6 files changed

+492
-8
lines changed

6 files changed

+492
-8
lines changed

backend/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@
4242
"script:enrich-members-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/enrich-members-and-organizations.ts",
4343
"script:enrich-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/enrich-organizations-synchronous.ts",
4444
"script:generate-merge-suggestions": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/generate-merge-suggestions.ts",
45-
"script:merge-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/merge-organizations.ts"
45+
"script:merge-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/merge-organizations.ts",
46+
"script:get-member-enrichment-data": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/get-member-enrichment-data.ts",
47+
"script:get-organization-enrichment-data": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/get-organization-enrichment-data.ts"
4648
},
4749
"dependencies": {
4850
"@aws-sdk/client-comprehend": "^3.159.0",
Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
import commandLineArgs from 'command-line-args'
2+
import commandLineUsage from 'command-line-usage'
3+
import * as fs from 'fs'
4+
import path from 'path'
5+
import { QueryTypes } from 'sequelize'
6+
import { databaseInit } from '@/database/databaseConnection'
7+
8+
/* eslint-disable no-console */
9+
10+
const banner = fs.readFileSync(path.join(__dirname, 'banner.txt'), 'utf8')
11+
12+
const options = [
13+
{
14+
name: 'help',
15+
alias: 'h',
16+
type: Boolean,
17+
description: 'Print this usage guide.',
18+
},
19+
]
20+
const sections = [
21+
{
22+
content: banner,
23+
raw: true,
24+
},
25+
{
26+
header: `Fix wrongly merged orgs using snapshot data - This is a development script, never run it in k8s context!`,
27+
content:
28+
'Fix wrongly merged orgs using snapshot data - This is a development script, never run it in k8s context!',
29+
},
30+
{
31+
header: 'Options',
32+
optionList: options,
33+
},
34+
]
35+
36+
const usage = commandLineUsage(sections)
37+
const parameters = commandLineArgs(options)
38+
39+
if (parameters.help) {
40+
console.log(usage)
41+
} else {
42+
setImmediate(async () => {
43+
const snapshotDb = null // we should get a connection to the snapshot db to get the destroyed data
44+
const prodDb = await databaseInit()
45+
46+
const badIdentities = [] // bad identities in the organization should be populated here before running
47+
48+
const identityToProcess = badIdentities[0]
49+
50+
// find organizationId from snapshot db using identity
51+
const orgs = await snapshotDb.sequelize.query(
52+
`select "organizationId" from "organizationIdentities" oi
53+
where oi.name = '${identityToProcess.name}' and oi.platform = '${identityToProcess.platform}' and "tenantId" = 'ad9a1d40-238d-488d-9433-69752a110550';
54+
`,
55+
{
56+
useMaster: true,
57+
},
58+
)
59+
60+
const deletedOrganizationId = orgs?.[0]?.[0]?.organizationId
61+
62+
let tx
63+
64+
try {
65+
if (deletedOrganizationId && deletedOrganizationId !== identityToProcess.organizationId) {
66+
// get the full organization row
67+
const orgRow = await snapshotDb.sequelize.query(
68+
`select * from organizations where id = '${deletedOrganizationId}'`,
69+
{
70+
useMaster: true,
71+
},
72+
)
73+
74+
console.log(orgRow[0][0])
75+
76+
// create a transaction
77+
tx = await prodDb.sequelize.transaction()
78+
79+
// CHECK IF ORG ALREADY EXISTS?
80+
const result = await prodDb.sequelize.query(
81+
`
82+
select id from organizations o
83+
where o.id = :deletedOrganizationId`,
84+
{
85+
replacements: {
86+
deletedOrganizationId,
87+
},
88+
type: QueryTypes.SELECT,
89+
transaction: tx,
90+
},
91+
)
92+
93+
if (result.length === 0 || result[0].length === 0) {
94+
// create the merged organization
95+
await prodDb.sequelize.query(
96+
`INSERT INTO organizations (
97+
id,
98+
description,
99+
"emails",
100+
"phoneNumbers",
101+
logo,
102+
tags,
103+
twitter,
104+
linkedin,
105+
crunchbase,
106+
employees,
107+
"revenueRange",
108+
"importHash",
109+
"createdAt",
110+
"updatedAt",
111+
"deletedAt",
112+
"tenantId",
113+
"createdById",
114+
"updatedById",
115+
location,
116+
github,
117+
website,
118+
"isTeamOrganization",
119+
"lastEnrichedAt",
120+
"employeeCountByCountry",
121+
type, "geoLocation", size, ticker, headline, profiles, naics,
122+
address, industry, founded, "displayName", attributes,
123+
"searchSyncedAt", "manuallyCreated", "affiliatedProfiles",
124+
"allSubsidiaries", "alternativeDomains", "alternativeNames",
125+
"averageEmployeeTenure", "averageTenureByLevel", "averageTenureByRole",
126+
"directSubsidiaries", "employeeChurnRate", "employeeCountByMonth",
127+
"employeeGrowthRate", "employeeCountByMonthByLevel", "employeeCountByMonthByRole",
128+
"gicsSector", "grossAdditionsByMonth", "grossDeparturesByMonth",
129+
"ultimateParent", "immediateParent", "weakIdentities")
130+
VALUES (
131+
:id, :description, :emails, :phoneNumbers, :logo,
132+
:tags, :twitter, :linkedin, :crunchbase, :employees, :revenueRange,
133+
:importHash, :createdAt, :updatedAt, :deletedAt, :tenantId,
134+
:createdById, :updatedById, :location, :github, :website,
135+
:isTeamOrganization, :lastEnrichedAt, :employeeCountByCountry,
136+
:type, :geoLocation, :size, :ticker, :headline, :profiles, :naics,
137+
:address, :industry, :founded, :displayName, :attributes,
138+
:searchSyncedAt, :manuallyCreated, :affiliatedProfiles,
139+
:allSubsidiaries, :alternativeDomains, :alternativeNames,
140+
:averageEmployeeTenure, :averageTenureByLevel, :averageTenureByRole,
141+
:directSubsidiaries, :employeeChurnRate, :employeeCountByMonth,
142+
:employeeGrowthRate, :employeeCountByMonthByLevel, :employeeCountByMonthByRole,
143+
:gicsSector, :grossAdditionsByMonth, :grossDeparturesByMonth,
144+
:ultimateParent, :immediateParent, :weakIdentities)`,
145+
{
146+
replacements: {
147+
...orgRow[0][0],
148+
twitter: orgRow[0][0].twitter ? JSON.stringify(orgRow[0][0].twitter) : null,
149+
linkedin: orgRow[0][0].linkedin ? JSON.stringify(orgRow[0][0].linkedin) : null,
150+
crunchbase: orgRow[0][0].crunchbase
151+
? JSON.stringify(orgRow[0][0].crunchbase)
152+
: null,
153+
revenueRange: orgRow[0][0].revenueRange
154+
? JSON.stringify(orgRow[0][0].revenueRange)
155+
: null,
156+
github: orgRow[0][0].github ? JSON.stringify(orgRow[0][0].github) : null,
157+
employeeCountByCountry: orgRow[0][0].employeeCountByCountry
158+
? JSON.stringify(orgRow[0][0].employeeCountByCountry)
159+
: null,
160+
naics: orgRow[0][0].naics ? JSON.stringify(orgRow[0][0].naics) : null,
161+
address: orgRow[0][0].address ? JSON.stringify(orgRow[0][0].address) : null,
162+
attributes: orgRow[0][0].attributes
163+
? JSON.stringify(orgRow[0][0].attributes)
164+
: null,
165+
averageTenureByLevel: orgRow[0][0].averageTenureByLevel
166+
? JSON.stringify(orgRow[0][0].averageTenureByLevel)
167+
: null,
168+
averageTenureByRole: orgRow[0][0].averageTenureByRole
169+
? JSON.stringify(orgRow[0][0].averageTenureByRole)
170+
: null,
171+
employeeChurnRate: orgRow[0][0].employeeChurnRate
172+
? JSON.stringify(orgRow[0][0].employeeChurnRate)
173+
: null,
174+
employeeCountByMonth: orgRow[0][0].employeeCountByMonth
175+
? JSON.stringify(orgRow[0][0].employeeCountByMonth)
176+
: null,
177+
employeeGrowthRate: orgRow[0][0].employeeGrowthRate
178+
? JSON.stringify(orgRow[0][0].employeeGrowthRate)
179+
: null,
180+
employeeCountByMonthByLevel: orgRow[0][0].employeeCountByMonthByLevel
181+
? JSON.stringify(orgRow[0][0].employeeCountByMonthByLevel)
182+
: null,
183+
employeeCountByMonthByRole: orgRow[0][0].employeeCountByMonthByRole
184+
? JSON.stringify(orgRow[0][0].employeeCountByMonthByRole)
185+
: null,
186+
grossAdditionsByMonth: orgRow[0][0].grossAdditionsByMonth
187+
? JSON.stringify(orgRow[0][0].grossAdditionsByMonth)
188+
: null,
189+
grossDeparturesByMonth: orgRow[0][0].grossDeparturesByMonth
190+
? JSON.stringify(orgRow[0][0].grossDeparturesByMonth)
191+
: null,
192+
weakIdentities: orgRow[0][0].weakIdentities
193+
? JSON.stringify(orgRow[0][0].weakIdentities)
194+
: null,
195+
directSubsidiaries: null,
196+
affiliatedProfiles: null,
197+
allSubsidiaries: null,
198+
alternativeDomains: null,
199+
alternativeNames: null,
200+
profiles: null,
201+
},
202+
type: QueryTypes.INSERT,
203+
transaction: tx,
204+
},
205+
)
206+
}
207+
208+
// update identity to belong to found org
209+
await prodDb.sequelize.query(
210+
`
211+
update "organizationIdentities"
212+
set "organizationId" = :deletedOrganizationId
213+
where
214+
name = :name
215+
and platform = :platform
216+
and "organizationId" = :oldOrganizationId
217+
and "tenantId" = 'ad9a1d40-238d-488d-9433-69752a110550'`,
218+
{
219+
replacements: {
220+
name: identityToProcess.name,
221+
platform: identityToProcess.platform,
222+
oldOrganizationId: identityToProcess.organizationId,
223+
deletedOrganizationId,
224+
},
225+
type: QueryTypes.UPDATE,
226+
transaction: tx,
227+
},
228+
)
229+
230+
// find memberOrganization rows that belonged to deleted org
231+
const result2 = await snapshotDb.sequelize.query(
232+
`
233+
select id from "memberOrganizations" mo
234+
where mo."organizationId" = :deletedOrganizationId;`,
235+
{
236+
replacements: {
237+
deletedOrganizationId,
238+
},
239+
type: QueryTypes.SELECT,
240+
},
241+
)
242+
243+
// Obtain only the id's from the resulting objects array
244+
const idsBelongToMergedOrg = result2.map((res) => res.id)
245+
246+
// memberOrganization row that the deleted org has in snapshot db, update memberOrganization rows in the prod db
247+
await prodDb.sequelize.query(
248+
`
249+
update "memberOrganizations"
250+
set "organizationId" = :deletedOrganizationId
251+
where
252+
id in (:idsBelongToMergedOrg)`,
253+
{
254+
replacements: {
255+
deletedOrganizationId,
256+
idsBelongToMergedOrg,
257+
},
258+
type: QueryTypes.UPDATE,
259+
transaction: tx,
260+
},
261+
)
262+
263+
// find distinct memberIds belong to deleted organization
264+
const result3 = await snapshotDb.sequelize.query(
265+
`
266+
select distinct mo."memberId" from "memberOrganizations" mo
267+
where mo."organizationId" = :deletedOrganizationId;`,
268+
{
269+
replacements: {
270+
deletedOrganizationId,
271+
},
272+
type: QueryTypes.SELECT,
273+
},
274+
)
275+
276+
const memberIds = result3.map((res) => res.memberId)
277+
278+
// update activity organizations that belong to these members
279+
await prodDb.sequelize.query(
280+
`
281+
update "activities"
282+
set "organizationId" = :deletedOrganizationId
283+
where
284+
"memberId" in (:memberIds)`,
285+
{
286+
replacements: {
287+
deletedOrganizationId,
288+
memberIds,
289+
},
290+
type: QueryTypes.UPDATE,
291+
transaction: tx,
292+
},
293+
)
294+
295+
// add restored organization to organizationSegments again
296+
await prodDb.sequelize.query(
297+
`
298+
insert into "organizationSegments" ("organizationId", "segmentId", "tenantId", "createdAt")
299+
values (:deletedOrganizationId, '86eb7dac-57d6-40aa-b034-37f1ff4b0ddb', 'ad9a1d40-238d-488d-9433-69752a110550', now())
300+
on conflict do nothing`,
301+
{
302+
replacements: {
303+
deletedOrganizationId,
304+
},
305+
type: QueryTypes.INSERT,
306+
transaction: tx,
307+
},
308+
)
309+
310+
await tx.commit()
311+
console.log(
312+
`Organization ${deletedOrganizationId} unmerged from organization ${identityToProcess.organizationId}`,
313+
)
314+
}
315+
} catch (e) {
316+
console.log(e)
317+
if (tx) {
318+
console.log('Rolling back transaction!')
319+
await tx.rollback()
320+
}
321+
}
322+
323+
process.exit(0)
324+
})
325+
}

0 commit comments

Comments
 (0)