Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 1 addition & 16 deletions backend/src/services/aws.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import AWS, { SQS } from 'aws-sdk'
import { trimUtf8ToMaxByteLength } from '@crowd/common'
import { COMPREHEND_CONFIG, IS_DEV_ENV, KUBE_MODE, S3_CONFIG, SQS_CONFIG } from '../conf'

let sqsInstance
Expand Down Expand Up @@ -100,22 +101,6 @@ if (KUBE_MODE) {
: undefined
}

const trimUtf8ToMaxByteLength = (utf8Str: string, maxByteLength: number): string => {
if (Buffer.byteLength(utf8Str, 'utf8') > maxByteLength) {
// this will get us close but some characters could be multibyte encoded so we might need to trim a bit more
utf8Str = utf8Str.slice(0, maxByteLength)
}

// trim till we get to the requested byte length or lower (if we cut multibyte character)
let byteLength = Buffer.byteLength(utf8Str, 'utf8')
while (byteLength > maxByteLength) {
utf8Str = utf8Str.slice(0, -1)
byteLength = Buffer.byteLength(utf8Str, 'utf8')
}

return utf8Str
}

const ALLOWED_MAX_BYTE_LENGTH = 5000

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { SERVICE_CONFIG } from '@/conf'
import { IDbMemberSyncData } from '@/repo/member.data'
import { MemberRepository } from '@/repo/member.repo'
import { OpenSearchIndex } from '@/types'
import { distinct, distinctBy, groupBy } from '@crowd/common'
import { distinct, distinctBy, groupBy, trimUtf8ToMaxByteLength } from '@crowd/common'
import { DbStore } from '@crowd/database'
import { Logger, LoggerBase, logExecutionTime } from '@crowd/logging'
import { RedisClient } from '@crowd/redis'
Expand Down Expand Up @@ -433,21 +433,29 @@ export class MemberSyncService extends LoggerBase {
p.keyword_displayName = data.displayName
const p_attributes = {}

// max byte length that can be indexed in OpenSearch
const maxByteLength = 25000

for (const attribute of attributes) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const attData = data.attributes as any

if (attribute.name in attData) {
if (attribute.type === MemberAttributeType.SPECIAL) {
const data = JSON.stringify(attData[attribute.name])
let data = JSON.stringify(attData[attribute.name])
data = trimUtf8ToMaxByteLength(data, maxByteLength)
p_attributes[`string_${attribute.name}`] = data
} else {
const p_data = {}
const defValue = attData[attribute.name].default
const prefix = this.attributeTypeToOpenSearchPrefix(defValue, attribute.type)

for (const key of Object.keys(attData[attribute.name])) {
p_data[`${prefix}_${key}`] = attData[attribute.name][key]
let value = attData[attribute.name][key]
if (attribute.type === MemberAttributeType.STRING) {
value = trimUtf8ToMaxByteLength(value, maxByteLength)
}
p_data[`${prefix}_${key}`] = value
}

p_attributes[`obj_${attribute.name}`] = p_data
Expand Down
15 changes: 15 additions & 0 deletions services/libs/common/src/byteLength.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
export const trimUtf8ToMaxByteLength = (utf8Str: string, maxByteLength: number): string => {
if (Buffer.byteLength(utf8Str, 'utf8') > maxByteLength) {
// this will get us close but some characters could be multibyte encoded so we might need to trim a bit more
utf8Str = utf8Str.slice(0, maxByteLength)
}

// trim till we get to the requested byte length or lower (if we cut multibyte character)
let byteLength = Buffer.byteLength(utf8Str, 'utf8')
while (byteLength > maxByteLength) {
utf8Str = utf8Str.slice(0, -1)
byteLength = Buffer.byteLength(utf8Str, 'utf8')
}

return utf8Str
}
1 change: 1 addition & 0 deletions services/libs/common/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ export * from './strings'
export * from './types'
export * from './requestThrottler'
export * from './rawQueryParser'
export * from './byteLength'
17 changes: 1 addition & 16 deletions services/libs/sentiment/src/sentiment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { IS_DEV_ENV } from '@crowd/common'
import { getServiceChildLogger } from '@crowd/logging'
import { getComprehendClient } from './client'
import { ISentimentAnalysisResult, ISentimentClientConfig } from './types'
import { trimUtf8ToMaxByteLength } from '@crowd/common'

const log = getServiceChildLogger('sentiment')

Expand Down Expand Up @@ -134,22 +135,6 @@ const mapResult = (result: DetectSentimentResponse): ISentimentAnalysisResult =>
}
}

const trimUtf8ToMaxByteLength = (utf8Str: string, maxByteLength: number): string => {
if (Buffer.byteLength(utf8Str, 'utf8') > maxByteLength) {
// this will get us close but some characters could be multibyte encoded so we might need to trim a bit more
utf8Str = utf8Str.slice(0, maxByteLength)
}

// trim till we get to the requested byte length or lower (if we cut multibyte character)
let byteLength = Buffer.byteLength(utf8Str, 'utf8')
while (byteLength > maxByteLength) {
utf8Str = utf8Str.slice(0, -1)
byteLength = Buffer.byteLength(utf8Str, 'utf8')
}

return utf8Str
}

const ALLOWED_MAX_BYTE_LENGTH = 5000

const prepareText = (text: string): string => {
Expand Down