From 3858c7e7d420080b7ada608e29b49e558d4a2bb2 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Thu, 22 Jan 2026 15:53:20 +0100 Subject: [PATCH 01/17] chore: db migration to switch maintainersRepo reference to public.repositories --- ...368__changeMaintainersRepoReferenceToRepositories.sql | 0 ...368__changeMaintainersRepoReferenceToRepositories.sql | 9 +++++++++ 2 files changed, 9 insertions(+) create mode 100644 backend/src/database/migrations/U1769092368__changeMaintainersRepoReferenceToRepositories.sql create mode 100644 backend/src/database/migrations/V1769092368__changeMaintainersRepoReferenceToRepositories.sql diff --git a/backend/src/database/migrations/U1769092368__changeMaintainersRepoReferenceToRepositories.sql b/backend/src/database/migrations/U1769092368__changeMaintainersRepoReferenceToRepositories.sql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/src/database/migrations/V1769092368__changeMaintainersRepoReferenceToRepositories.sql b/backend/src/database/migrations/V1769092368__changeMaintainersRepoReferenceToRepositories.sql new file mode 100644 index 0000000000..c9c86fa3f5 --- /dev/null +++ b/backend/src/database/migrations/V1769092368__changeMaintainersRepoReferenceToRepositories.sql @@ -0,0 +1,9 @@ + +ALTER TABLE "maintainersInternal" +DROP CONSTRAINT IF EXISTS "maintainersInternal_repoId_fkey"; + +ALTER TABLE "maintainersInternal" +ADD CONSTRAINT "maintainersInternal_repoId_fkey" +FOREIGN KEY ("repoId") +REFERENCES repositories(id) +ON DELETE CASCADE; \ No newline at end of file From 7c6e8d3fcec2c6d4e128e75703d2f490e64a7281 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Thu, 22 Jan 2026 15:59:52 +0100 Subject: [PATCH 02/17] chore: deprecate removeGitHubRepoMapping to delete from githubRepos --- .../src/activities/nangoActivities.ts | 10 -------- .../src/workflows/syncGithubIntegration.ts | 2 +- .../src/integrations/index.ts | 24 ------------------- 3 files changed, 1 insertion(+), 35 deletions(-) diff --git a/services/apps/nango_worker/src/activities/nangoActivities.ts b/services/apps/nango_worker/src/activities/nangoActivities.ts index 9ef10bc124..a3929241b9 100644 --- a/services/apps/nango_worker/src/activities/nangoActivities.ts +++ b/services/apps/nango_worker/src/activities/nangoActivities.ts @@ -6,7 +6,6 @@ import { addRepoToGitIntegration, fetchIntegrationById, findIntegrationDataForNangoWebhookProcessing, - removeGitHubRepoMapping, removeGithubNangoConnection, setGithubIntegrationSettingsOrgs, setNangoIntegrationCursor, @@ -470,15 +469,6 @@ export async function unmapGithubRepo(integrationId: string, repo: IGithubRepoDa ) const repoUrl = `https://github.com/${repo.owner}/${repo.repoName}` - // remove repo from githubRepos mapping - await removeGitHubRepoMapping( - dbStoreQx(svc.postgres.writer), - svc.redis, - integrationId, - repo.owner, - repo.repoName, - ) - // soft-delete from public.repositories const affected = await softDeleteRepositories( dbStoreQx(svc.postgres.writer), diff --git a/services/apps/nango_worker/src/workflows/syncGithubIntegration.ts b/services/apps/nango_worker/src/workflows/syncGithubIntegration.ts index 36a6e8de8c..d44a12ad06 100644 --- a/services/apps/nango_worker/src/workflows/syncGithubIntegration.ts +++ b/services/apps/nango_worker/src/workflows/syncGithubIntegration.ts @@ -21,7 +21,7 @@ export async function syncGithubIntegration(args: ISyncGithubIntegrationArgument // delete connection from integrations.settings.nangoMapping object await activity.removeGithubConnection(integrationId, repo.connectionId) - // delete githubRepos mapping + soft-delete from public.repositories + // delete from public.repositories await activity.unmapGithubRepo(integrationId, repo.repo) } diff --git a/services/libs/data-access-layer/src/integrations/index.ts b/services/libs/data-access-layer/src/integrations/index.ts index 069362864a..c1615965a4 100644 --- a/services/libs/data-access-layer/src/integrations/index.ts +++ b/services/libs/data-access-layer/src/integrations/index.ts @@ -487,30 +487,6 @@ export async function addGithubNangoConnection( ) } -export async function removeGitHubRepoMapping( - qx: QueryExecutor, - redisClient: RedisClient, - integrationId: string, - owner: string, - repoName: string, -): Promise { - await qx.result( - ` - update "githubRepos" - set "deletedAt" = now() - where "integrationId" = $(integrationId) - and lower(url) = lower($(repo)) - `, - { - integrationId, - repo: `https://github.com/${owner}/${repoName}`, - }, - ) - - const cache = new RedisCache('githubRepos', redisClient, log) - await cache.deleteAll() -} - export async function addGitHubRepoMapping( qx: QueryExecutor, integrationId: string, From ffc2dedf0ad4ab4f55a581e031068c991c7a5952 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Thu, 22 Jan 2026 16:02:17 +0100 Subject: [PATCH 03/17] chore: deprecate mapGithubRepo --- .../src/activities/nangoActivities.ts | 14 -------- .../src/workflows/syncGithubIntegration.ts | 3 -- .../src/integrations/index.ts | 32 ------------------- 3 files changed, 49 deletions(-) diff --git a/services/apps/nango_worker/src/activities/nangoActivities.ts b/services/apps/nango_worker/src/activities/nangoActivities.ts index a3929241b9..3b822b0361 100644 --- a/services/apps/nango_worker/src/activities/nangoActivities.ts +++ b/services/apps/nango_worker/src/activities/nangoActivities.ts @@ -1,7 +1,6 @@ import { IS_DEV_ENV, IS_STAGING_ENV, singleOrDefault } from '@crowd/common' import { CommonIntegrationService, GithubIntegrationService } from '@crowd/common_services' import { - addGitHubRepoMapping, addGithubNangoConnection, addRepoToGitIntegration, fetchIntegrationById, @@ -449,19 +448,6 @@ export async function deleteConnection( await deleteNangoConnection(providerConfigKey as NangoIntegration, connectionId) } -export async function mapGithubRepo(integrationId: string, repo: IGithubRepoData): Promise { - svc.log.info( - { integrationId }, - `Adding github repo mapping for integration ${integrationId} and repo ${repo.owner}/${repo.repoName}!`, - ) - await addGitHubRepoMapping( - dbStoreQx(svc.postgres.writer), - integrationId, - repo.owner, - repo.repoName, - ) -} - export async function unmapGithubRepo(integrationId: string, repo: IGithubRepoData): Promise { svc.log.info( { integrationId }, diff --git a/services/apps/nango_worker/src/workflows/syncGithubIntegration.ts b/services/apps/nango_worker/src/workflows/syncGithubIntegration.ts index d44a12ad06..33164bc9c9 100644 --- a/services/apps/nango_worker/src/workflows/syncGithubIntegration.ts +++ b/services/apps/nango_worker/src/workflows/syncGithubIntegration.ts @@ -53,9 +53,6 @@ export async function syncGithubIntegration(args: ISyncGithubIntegrationArgument // add connection to integrations.settings.nangoMapping object await activity.setGithubConnection(integrationId, repo, connectionId) - // add repo to githubRepos mapping if it's not already mapped - await activity.mapGithubRepo(integrationId, repo) - // add repo to git integration await activity.updateGitIntegrationWithRepo(integrationId, repo) diff --git a/services/libs/data-access-layer/src/integrations/index.ts b/services/libs/data-access-layer/src/integrations/index.ts index c1615965a4..7678c8f346 100644 --- a/services/libs/data-access-layer/src/integrations/index.ts +++ b/services/libs/data-access-layer/src/integrations/index.ts @@ -487,38 +487,6 @@ export async function addGithubNangoConnection( ) } -export async function addGitHubRepoMapping( - qx: QueryExecutor, - integrationId: string, - owner: string, - repoName: string, -): Promise { - await qx.result( - ` - insert into "githubRepos"("tenantId", "integrationId", "segmentId", url) - values( - $(tenantId), - $(integrationId), - (select "segmentId" from integrations where id = $(integrationId) limit 1), - $(url) - ) - on conflict ("tenantId", url) do update - set - "deletedAt" = null, - "segmentId" = (select "segmentId" from integrations where id = $(integrationId) limit 1), - "integrationId" = $(integrationId), - "updatedAt" = now() - -- in case there is a row already only update it if it's deleted so deletedAt is not null - -- otherwise leave it as is - where "githubRepos"."deletedAt" is not null - `, - { - tenantId: DEFAULT_TENANT_ID, - integrationId, - url: `https://github.com/${owner}/${repoName}`, - }, - ) -} /** * Syncs repositories to git.repositories table (git-integration V2) From 8c40f39db39c631cb332de9131ddc04c60ba7824 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Thu, 22 Jan 2026 16:06:27 +0100 Subject: [PATCH 04/17] chore: deprecate removePlainGitHubRepoMapping --- .../src/services/integration.service.ts | 2 -- .../src/integrations/index.ts | 22 ------------------- 2 files changed, 24 deletions(-) diff --git a/services/libs/common_services/src/services/integration.service.ts b/services/libs/common_services/src/services/integration.service.ts index 474db65b02..8eb0410cd1 100644 --- a/services/libs/common_services/src/services/integration.service.ts +++ b/services/libs/common_services/src/services/integration.service.ts @@ -10,7 +10,6 @@ import { fetchIntegrationById, findNangoRepositoriesToBeRemoved, findRepositoriesForSegment, - removePlainGitHubRepoMapping, removePlainGitlabRepoMapping, } from '@crowd/data-access-layer/src/integrations' import { QueryExecutor } from '@crowd/data-access-layer/src/queryExecutor' @@ -134,7 +133,6 @@ export class CommonIntegrationService { // Unmap repositories that should be removed for (const repo of reposToBeRemoved) { - await removePlainGitHubRepoMapping(qx, redis, integrationId, repo) await removePlainGitlabRepoMapping(qx, redis, integrationId, repo) } diff --git a/services/libs/data-access-layer/src/integrations/index.ts b/services/libs/data-access-layer/src/integrations/index.ts index 7678c8f346..f160f2d7eb 100644 --- a/services/libs/data-access-layer/src/integrations/index.ts +++ b/services/libs/data-access-layer/src/integrations/index.ts @@ -676,28 +676,6 @@ export async function addRepoToGitIntegration( ) } -export async function removePlainGitHubRepoMapping( - qx: QueryExecutor, - redisClient: RedisClient, - integrationId: string, - repo: string, -): Promise { - await qx.result( - ` - update "githubRepos" - set "deletedAt" = now() - where "integrationId" = $(integrationId) - and lower(url) = lower($(repo)) - `, - { - integrationId, - repo, - }, - ) - - const cache = new RedisCache('githubRepos', redisClient, log) - await cache.deleteAll() -} export async function removePlainGitlabRepoMapping( qx: QueryExecutor, From 6fe75f967e724fc3266e22a21db8ae2b5b18e585 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Thu, 22 Jan 2026 16:09:05 +0100 Subject: [PATCH 05/17] chore: deprecate githubReposRepository --- .../repositories/githubReposRepository.ts | 129 ------------------ backend/src/services/integrationService.ts | 12 +- 2 files changed, 1 insertion(+), 140 deletions(-) delete mode 100644 backend/src/database/repositories/githubReposRepository.ts diff --git a/backend/src/database/repositories/githubReposRepository.ts b/backend/src/database/repositories/githubReposRepository.ts deleted file mode 100644 index 39ac7e4e75..0000000000 --- a/backend/src/database/repositories/githubReposRepository.ts +++ /dev/null @@ -1,129 +0,0 @@ -import trim from 'lodash/trim' -import { QueryTypes } from 'sequelize' - -import { DEFAULT_TENANT_ID, Error400 } from '@crowd/common' -import { RedisCache } from '@crowd/redis' - -import { IRepositoryOptions } from './IRepositoryOptions' -import SequelizeRepository from './sequelizeRepository' - -export default class GithubReposRepository { - private static getCache(options: IRepositoryOptions): RedisCache { - return new RedisCache('githubRepos', options.redis, options.log) - } - - private static async bulkInsert( - table, - columns, - placeholdersFn, - values, - options: IRepositoryOptions, - ) { - const transaction = SequelizeRepository.getTransaction(options) - const seq = SequelizeRepository.getSequelize(options) - - columns = columns.map((column) => trim(column, '"')).map((column) => `"${column}"`) - const joinedColumns = columns.join(', ') - - const placeholders = values.map((value, idx) => placeholdersFn(idx)) - - const replacements = values.reduce((acc, value) => { - Object.entries(value).forEach(([key, value]) => { - acc[key] = value - }) - return acc - }, {}) - - return seq.query( - ` - INSERT INTO "${table}" - (${joinedColumns}) - VALUES ${placeholders.join(', ')} - ON CONFLICT ("tenantId", "url") - DO UPDATE SET "segmentId" = EXCLUDED."segmentId", - "integrationId" = EXCLUDED."integrationId", - "deletedAt" = NULL - `, - { - replacements, - transaction, - }, - ) - } - - static async updateMapping(integrationId, mapping, options: IRepositoryOptions) { - const mappingEntries = Object.entries(mapping).map(([url, segmentId]) => ({ - url: url as string, - segmentId: segmentId as string, - })) - - const transaction = SequelizeRepository.getTransaction(options) - const seq = SequelizeRepository.getSequelize(options) - - const existingRows = await seq.query( - ` - select * from "githubRepos" where "tenantId" = :tenantId and "url" in (:urls) - `, - { - replacements: { - tenantId: DEFAULT_TENANT_ID, - urls: mappingEntries.map((e) => e.url), - }, - type: QueryTypes.SELECT, - transaction, - }, - ) - - for (const row of existingRows as any[]) { - if (!row.deletedAt && row.integrationId !== integrationId) { - options.log.warn( - `Trying to update github repo ${row.url} mapping with integrationId ${integrationId} but it is already mapped to integration ${row.integrationId}!`, - ) - throw new Error400( - options.language, - 'errors.integrations.repoAlreadyMapped', - row.url, - integrationId, - row.integrationId, - ) - } - } - - await GithubReposRepository.bulkInsert( - 'githubRepos', - ['tenantId', 'integrationId', 'segmentId', 'url'], - (idx) => `(:tenantId_${idx}, :integrationId_${idx}, :segmentId_${idx}, :url_${idx})`, - mappingEntries.map(({ url, segmentId }, idx) => ({ - [`tenantId_${idx}`]: DEFAULT_TENANT_ID, - [`integrationId_${idx}`]: integrationId, - [`segmentId_${idx}`]: segmentId, - [`url_${idx}`]: url, - })), - options, - ) - - await this.getCache(options).deleteAll() - } - - static async delete(integrationId, options: IRepositoryOptions) { - const seq = SequelizeRepository.getSequelize(options) - const transaction = SequelizeRepository.getTransaction(options) - - await seq.query( - ` - UPDATE "githubRepos" - SET "deletedAt" = NOW() - WHERE "integrationId" = :integrationId - `, - { - replacements: { - integrationId, - }, - type: QueryTypes.UPDATE, - transaction, - }, - ) - - await this.getCache(options).deleteAll() - } -} diff --git a/backend/src/services/integrationService.ts b/backend/src/services/integrationService.ts index 053e1970a6..f39a4d3a33 100644 --- a/backend/src/services/integrationService.ts +++ b/backend/src/services/integrationService.ts @@ -67,8 +67,7 @@ import { import { DISCORD_CONFIG, GITHUB_CONFIG, GITLAB_CONFIG, IS_TEST_ENV, KUBE_MODE } from '../conf/index' import GitReposRepository from '../database/repositories/gitReposRepository' -import GithubReposRepository from '../database/repositories/githubReposRepository' -import IntegrationRepository from '../database/repositories/integrationRepository' +import IntegrationRepository from '../database/repositories/integrationRepository' import SequelizeRepository from '../database/repositories/sequelizeRepository' import telemetryTrack from '../segment/telemetryTrack' import track from '../segment/track' @@ -505,12 +504,6 @@ export default class IntegrationService { integration.platform === PlatformType.GITHUB || integration.platform === PlatformType.GITHUB_NANGO ) { - // soft delete github repos from legacy table - await GithubReposRepository.delete(integration.id, { - ...this.options, - transaction, - }) - // Soft delete from public.repositories only repos owned by this GitHub integration // This preserves native Git repos that aren't mirrored from GitHub const qx = SequelizeRepository.getQueryExecutor({ @@ -1034,9 +1027,6 @@ export default class IntegrationService { transaction, } try { - this.options.log.info(`Updating GitHub repos mapping for integration ${integrationId}!`) - await GithubReposRepository.updateMapping(integrationId, mapping, txOptions) - // add the repos to the git integration const repos: Record = Object.entries(mapping).reduce( (acc, [url, segmentId]) => { From e983869ec4e5620a073529416a39abb3ed0a7db9 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Thu, 22 Jan 2026 16:16:34 +0100 Subject: [PATCH 06/17] chore: deprecate git.repositories table --- .../repositories/gitReposRepository.ts | 102 --------------- backend/src/services/integrationService.ts | 15 --- .../src/integrations/index.ts | 123 ------------------ 3 files changed, 240 deletions(-) delete mode 100644 backend/src/database/repositories/gitReposRepository.ts diff --git a/backend/src/database/repositories/gitReposRepository.ts b/backend/src/database/repositories/gitReposRepository.ts deleted file mode 100644 index 16fabf355b..0000000000 --- a/backend/src/database/repositories/gitReposRepository.ts +++ /dev/null @@ -1,102 +0,0 @@ -import { QueryTypes } from 'sequelize' - -import { IRepositoryOptions } from './IRepositoryOptions' -import SequelizeRepository from './sequelizeRepository' - -export default class GitReposRepository { - /** - * Soft deletes repositories from git.repositories table - * @param integrationId The integration ID to delete repositories for - * @param options Repository options - */ - static async delete(integrationId: string, options: IRepositoryOptions) { - const seq = SequelizeRepository.getSequelize(options) - const transaction = SequelizeRepository.getTransaction(options) - - const [, affectedCount] = await seq.query( - ` - UPDATE git.repositories - SET "deletedAt" = NOW() - WHERE "integrationId" = :integrationId - AND "deletedAt" IS NULL - `, - { - replacements: { - integrationId, - }, - type: QueryTypes.UPDATE, - transaction, - }, - ) - - options.log.info(`Soft deleted ${affectedCount} repositories for integration ${integrationId}`) - - return affectedCount - } - - /** - * Upserts repositories into git.repositories table - * - * @param repositories - Array with id, url, integrationId, segmentId, forkedFrom (null preserves existing value) - * @param options - Repository options - */ - static async upsert( - repositories: Array<{ - id: string - url: string - integrationId: string - segmentId: string - forkedFrom?: string | null - }>, - options: IRepositoryOptions, - ) { - if (!repositories || repositories.length === 0) { - return - } - - const seq = SequelizeRepository.getSequelize(options) - const transaction = SequelizeRepository.getTransaction(options) - - // Build SQL placeholders and parameter replacements in a single loop - const placeholders: string[] = [] - const replacements: Record = {} - - repositories.forEach((repo, idx) => { - // Build placeholder for this repository - placeholders.push( - `(:id_${idx}, :url_${idx}, :integrationId_${idx}, :segmentId_${idx}, :forkedFrom_${idx})`, - ) - - // Build replacements for this repository - replacements[`id_${idx}`] = repo.id - replacements[`url_${idx}`] = repo.url - replacements[`integrationId_${idx}`] = repo.integrationId - replacements[`segmentId_${idx}`] = repo.segmentId - replacements[`forkedFrom_${idx}`] = repo.forkedFrom || null - }) - - const placeholdersString = placeholders.join(', ') - - await seq.query( - ` - INSERT INTO git.repositories (id, url, "integrationId", "segmentId", "forkedFrom") - VALUES ${placeholdersString} - ON CONFLICT (url) DO UPDATE SET - id = EXCLUDED.id, - "integrationId" = EXCLUDED."integrationId", - "segmentId" = EXCLUDED."segmentId", - "forkedFrom" = COALESCE(EXCLUDED."forkedFrom", git.repositories."forkedFrom"), - "updatedAt" = NOW(), - "deletedAt" = NULL, - state = 'pending', - priority = 1, - "lastProcessedAt" = NULL, - "lastProcessedCommit" = NULL - `, - { - replacements, - transaction, - }, - ) - } -} diff --git a/backend/src/services/integrationService.ts b/backend/src/services/integrationService.ts index f39a4d3a33..3d5def8278 100644 --- a/backend/src/services/integrationService.ts +++ b/backend/src/services/integrationService.ts @@ -8,7 +8,6 @@ import { QueryTypes, Transaction } from 'sequelize' import { EDITION, Error400, Error404, Error500, Error542, encryptData } from '@crowd/common' import { CommonIntegrationService, getGithubInstallationToken } from '@crowd/common_services' -import { syncRepositoriesToGitV2 } from '@crowd/data-access-layer' import { ICreateInsightsProject, deleteMissingSegmentRepositories, @@ -456,13 +455,6 @@ export default class IntegrationService { if (remainingRepos.length === 0) { // If no repos left, delete the Git integration entirely - // Soft delete git.repositories for git-integration V2 - await GitReposRepository.delete(gitIntegration.id, { - ...this.options, - transaction, - }) - - // Then delete the git integration await IntegrationRepository.destroy(gitIntegration.id, { ...this.options, transaction, @@ -543,11 +535,6 @@ export default class IntegrationService { ...this.options, transaction, }) - - await GitReposRepository.delete(integration.id, { - ...this.options, - transaction, - }) } // Soft delete from public.repositories for code integrations @@ -1504,8 +1491,6 @@ export default class IntegrationService { ) } - await syncRepositoriesToGitV2(qx, remotes, integration.id, currentSegmentId) - // sync to public.repositories (only for direct GIT connections, other platforms handle it themselves) if (!sourcePlatform) { const mapping = remotes.reduce( diff --git a/services/libs/data-access-layer/src/integrations/index.ts b/services/libs/data-access-layer/src/integrations/index.ts index f160f2d7eb..beee10b8f3 100644 --- a/services/libs/data-access-layer/src/integrations/index.ts +++ b/services/libs/data-access-layer/src/integrations/index.ts @@ -488,122 +488,6 @@ export async function addGithubNangoConnection( } -/** - * Syncs repositories to git.repositories table (git-integration V2) - * - * Finds existing repository IDs from githubRepos or gitlabRepos tables, - * or generates new UUIDs, then upserts to git.repositories table. - * - * @param qx - Query executor - * @param remotes - Array of repository objects with url and optional forkedFrom - * @param gitIntegrationId - The git integration ID - * @param segmentId - The segment ID for the repositories - */ -export async function syncRepositoriesToGitV2( - qx: QueryExecutor, - remotes: Array<{ url: string; forkedFrom?: string | null }>, - gitIntegrationId: string, - segmentId: string, -): Promise { - if (!remotes || remotes.length === 0) { - log.warn('No remotes provided to syncRepositoriesToGitV2') - return - } - - const urls = remotes.map((r) => r.url) - - // Check GitHub repos, GitLab repos, AND git.repositories for existing IDs - // Include soft-deleted repos to reuse their IDs on reconnect - const existingRepos: Array<{ - id: string - url: string - }> = await qx.select( - ` - WITH github_repos AS ( - SELECT id, url FROM "githubRepos" - WHERE url IN ($(urls:csv)) - ), - gitlab_repos AS ( - SELECT id, url FROM "gitlabRepos" - WHERE url IN ($(urls:csv)) - ), - git_repos AS ( - SELECT id, url FROM git.repositories - WHERE url IN ($(urls:csv)) - ) - SELECT DISTINCT ON (url) id, url FROM ( - SELECT id, url FROM github_repos - UNION ALL - SELECT id, url FROM gitlab_repos - UNION ALL - SELECT id, url FROM git_repos - ) combined - `, - { urls }, - ) - - // Create a map of existing url to id - const existingUrlToId = new Map(existingRepos.map((r) => [r.url, r.id])) - - // Build repositoriesToSync, using existing IDs where available - const repositoriesToSync: Array<{ - id: string - url: string - integrationId: string - segmentId: string - forkedFrom?: string | null - }> = remotes.map((remote) => { - const existingId = existingUrlToId.get(remote.url) - return { - id: existingId || generateUUIDv4(), - url: remote.url, - integrationId: gitIntegrationId, - segmentId, - forkedFrom: remote.forkedFrom || null, - } - }) - - if (existingRepos.length === 0) { - log.warn( - 'No existing repos found in githubRepos, gitlabRepos, or git.repositories - inserting new with generated UUIDs', - ) - } - - // Build SQL placeholders and parameters - const placeholders: string[] = [] - const params: Record = {} - - repositoriesToSync.forEach((repo, idx) => { - placeholders.push( - `($(id_${idx}), $(url_${idx}), $(integrationId_${idx}), $(segmentId_${idx}), $(forkedFrom_${idx}))`, - ) - params[`id_${idx}`] = repo.id - params[`url_${idx}`] = repo.url - params[`integrationId_${idx}`] = repo.integrationId - params[`segmentId_${idx}`] = repo.segmentId - params[`forkedFrom_${idx}`] = repo.forkedFrom || null - }) - - const placeholdersString = placeholders.join(', ') - - // Upsert to git.repositories - await qx.result( - ` - INSERT INTO git.repositories (id, url, "integrationId", "segmentId", "forkedFrom") - VALUES ${placeholdersString} - ON CONFLICT (id) DO UPDATE SET - "integrationId" = EXCLUDED."integrationId", - "segmentId" = EXCLUDED."segmentId", - "forkedFrom" = COALESCE(EXCLUDED."forkedFrom", git.repositories."forkedFrom"), - "updatedAt" = NOW(), - "deletedAt" = NULL - `, - params, - ) - - log.info(`Synced ${repositoriesToSync.length} repos to git.repositories`) -} - export async function addRepoToGitIntegration( qx: QueryExecutor, integrationId: string, @@ -667,13 +551,6 @@ export async function addRepoToGitIntegration( log.info({ integrationId: gitIntegration.id, repoUrl }, 'Added repo to git integration settings!') - // Also sync to git.repositories table (git-integration V2) - await syncRepositoriesToGitV2( - qx, - [{ url: repoUrl, forkedFrom }], - gitIntegration.id, - githubIntegration.segmentId, - ) } From db37ca4827294c53ffdeda77c1d0a39c0ae13e4a Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Thu, 22 Jan 2026 16:56:59 +0100 Subject: [PATCH 07/17] chore: deprecate git.repositories table --- backend/src/services/integrationService.ts | 84 ++----------------- .../src/activities/nangoActivities.ts | 12 +-- .../src/repositories/index.ts | 26 ------ 3 files changed, 10 insertions(+), 112 deletions(-) diff --git a/backend/src/services/integrationService.ts b/backend/src/services/integrationService.ts index 3d5def8278..dd269be83e 100644 --- a/backend/src/services/integrationService.ts +++ b/backend/src/services/integrationService.ts @@ -5,7 +5,7 @@ import axios, { AxiosRequestConfig, AxiosResponse } from 'axios' import lodash from 'lodash' import moment from 'moment' import { QueryTypes, Transaction } from 'sequelize' - +import { generateUUIDv4 as uuid } from '@crowd/common' import { EDITION, Error400, Error404, Error500, Error542, encryptData } from '@crowd/common' import { CommonIntegrationService, getGithubInstallationToken } from '@crowd/common_services' import { @@ -19,7 +19,6 @@ import { ICreateRepository, IRepository, IRepositoryMapping, - getGitRepositoryIdsByUrl, getIntegrationReposMapping, getRepositoriesBySourceIntegrationId, getRepositoriesByUrl, @@ -65,7 +64,6 @@ import { } from '@/serverless/integrations/usecases/groupsio/types' import { DISCORD_CONFIG, GITHUB_CONFIG, GITLAB_CONFIG, IS_TEST_ENV, KUBE_MODE } from '../conf/index' -import GitReposRepository from '../database/repositories/gitReposRepository' import IntegrationRepository from '../database/repositories/integrationRepository' import SequelizeRepository from '../database/repositories/sequelizeRepository' import telemetryTrack from '../segment/telemetryTrack' @@ -460,26 +458,6 @@ export default class IntegrationService { transaction, }) } else { - // Soft delete from git.repositories only the repos owned by the deleted integration - const urlsToRemove = allGitRepos - .filter((repo) => repo.sourceIntegrationId === id) - .map((r) => r.url) - - if (urlsToRemove.length > 0) { - await qxForGit.result( - ` - UPDATE git.repositories - SET "deletedAt" = NOW() - WHERE url IN ($(urlsToRemove:csv)) - AND "deletedAt" IS NULL - `, - { urlsToRemove }, - ) - this.options.log.info( - `Soft deleted ${urlsToRemove.length} repos from git.repositories for integration ${id}`, - ) - } - // Update git integration settings with remaining remotes const remainingRemotes = remainingRepos.map((r) => r.url) await this.gitConnectOrUpdate( @@ -529,7 +507,6 @@ export default class IntegrationService { }) } - // Soft delete git.repositories for git integration if (integration.platform === PlatformType.GIT) { await this.validateGitIntegrationDeletion(integration.id, { ...this.options, @@ -1366,7 +1343,7 @@ export default class IntegrationService { } /** - * Adds/updates Git integration and syncs repositories to git.repositories table (git-integration V2) + * Adds/updates Git integration and syncs repositories to repositories table * * @param integrationData.remotes - Repository objects with url and optional forkedFrom (parent repo URL). * If forkedFrom is null, existing DB value is preserved. @@ -1425,7 +1402,7 @@ export default class IntegrationService { const existingRows = await seq.query( ` - SELECT url, "integrationId" FROM git.repositories + SELECT url, "sourceIntegrationId" AS "integrationId" FROM repositories WHERE url IN (:urls) AND "deletedAt" IS NULL `, { @@ -1451,46 +1428,12 @@ export default class IntegrationService { } } - // upsert repositories to git.repositories in order to be processed by git-integration V2 const currentSegmentId = (options || this.options).currentSegments[0].id const qx = SequelizeRepository.getQueryExecutor({ ...(options || this.options), transaction, }) - // Soft-delete repos from git.repositories that are no longer in the remotes list - // Only delete repos owned by this Git integration (not mirrored from other integrations) - const newRemoteUrls = new Set(remotes.map((r) => r.url)) - const existingOwnedRepos: Array<{ url: string }> = await qx.select( - ` - SELECT gr.url - FROM git.repositories gr - JOIN public.repositories pr ON pr.url = gr.url AND pr."deletedAt" IS NULL - WHERE gr."integrationId" = $(integrationId) - AND gr."deletedAt" IS NULL - AND pr."sourceIntegrationId" = $(integrationId) - `, - { integrationId: integration.id }, - ) - const urlsToDelete = existingOwnedRepos - .map((r) => r.url) - .filter((url) => !newRemoteUrls.has(url)) - - if (urlsToDelete.length > 0) { - await qx.result( - ` - UPDATE git.repositories - SET "deletedAt" = NOW() - WHERE url IN ($(urlsToDelete:csv)) - AND "deletedAt" IS NULL - `, - { urlsToDelete }, - ) - this.options.log.info( - `Soft-deleted ${urlsToDelete.length} owned repos from git.repositories`, - ) - } - // sync to public.repositories (only for direct GIT connections, other platforms handle it themselves) if (!sourcePlatform) { const mapping = remotes.reduce( @@ -3233,16 +3176,6 @@ export default class IntegrationService { const segmentIds = [...new Set(urls.map((url) => mapping[url]))] - const isGitHubPlatform = [PlatformType.GITHUB, PlatformType.GITHUB_NANGO].includes( - sourcePlatform, - ) - - const [gitRepoIdMap, sourceIntegration] = await Promise.all([ - // TODO: after migration, generate UUIDs instead of fetching from git.repositories - getGitRepositoryIdsByUrl(qx, urls), - isGitHubPlatform ? IntegrationRepository.findById(sourceIntegrationId, txOptions) : null, - ]) - const collectionService = new CollectionService(txOptions) const insightsProjectMap = new Map() const gitIntegrationMap = new Map() @@ -3281,6 +3214,10 @@ export default class IntegrationService { // Build forkedFrom map from integration settings (for GITHUB repositories) const forkedFromMap = new Map() + const isGitHubPlatform = [PlatformType.GITHUB, PlatformType.GITHUB_NANGO].includes( + sourcePlatform, + ) + const sourceIntegration = isGitHubPlatform ? IntegrationRepository.findById(sourceIntegrationId, txOptions) : null if (sourceIntegration?.settings?.orgs) { const allRepos = sourceIntegration.settings.orgs.flatMap((org: any) => org.repos || []) for (const repo of allRepos) { @@ -3294,15 +3231,10 @@ export default class IntegrationService { const payloads: ICreateRepository[] = [] for (const url of urls) { const segmentId = mapping[url] - const id = gitRepoIdMap.get(url) + const id = uuid() const insightsProjectId = insightsProjectMap.get(segmentId) const gitIntegrationId = gitIntegrationMap.get(segmentId) - if (!id) { - // TODO: post migration generate id and remove lookup - this.options.log.warn(`No git.repositories ID found for URL ${url}`) - throw new Error500('Repo not found in git.repositories') - } payloads.push({ id, diff --git a/services/apps/nango_worker/src/activities/nangoActivities.ts b/services/apps/nango_worker/src/activities/nangoActivities.ts index 3b822b0361..cd3b302c5d 100644 --- a/services/apps/nango_worker/src/activities/nangoActivities.ts +++ b/services/apps/nango_worker/src/activities/nangoActivities.ts @@ -9,6 +9,7 @@ import { setGithubIntegrationSettingsOrgs, setNangoIntegrationCursor, } from '@crowd/data-access-layer/src/integrations' +import { generateUUIDv4 as uuid } from '@crowd/common' import IntegrationStreamRepository from '@crowd/data-access-layer/src/old/apps/integration_stream_worker/integrationStream.repo' import { dbStoreQx } from '@crowd/data-access-layer/src/queryExecutor' import { softDeleteRepositories, upsertRepository } from '@crowd/data-access-layer/src/repositories' @@ -548,18 +549,9 @@ export async function mapGithubRepoToRepositories( throw new Error(`Insights project not found for segment ${githubIntegration.segmentId}!`) } - // TODO: Post migration, generate UUID instead of fetching from git.repositories - const gitRepo = await qx.selectOneOrNone( - `SELECT id FROM git.repositories WHERE url = $(url) AND "deletedAt" IS NULL`, - { url: repoUrl }, - ) - if (!gitRepo) { - throw new Error(`Repository ${repoUrl} not found in git.repositories!`) - } - try { const result = await upsertRepository(qx, { - id: gitRepo.id, + id: uuid(), url: repoUrl, segmentId: githubIntegration.segmentId, gitIntegrationId: gitIntegration.id, diff --git a/services/libs/data-access-layer/src/repositories/index.ts b/services/libs/data-access-layer/src/repositories/index.ts index 64e6d64eba..afececd502 100644 --- a/services/libs/data-access-layer/src/repositories/index.ts +++ b/services/libs/data-access-layer/src/repositories/index.ts @@ -156,32 +156,6 @@ export async function getRepositoriesBySourceIntegrationId( ) } -/** - * Get git repository IDs by URLs from git.repositories table - * @param qx - Query executor - * @param urls - Array of repository URLs - * @returns Map of URL to repository ID - */ -export async function getGitRepositoryIdsByUrl( - qx: QueryExecutor, - urls: string[], -): Promise> { - if (urls.length === 0) { - return new Map() - } - - const results = await qx.select( - ` - SELECT id, url - FROM git.repositories - WHERE url IN ($(urls:csv)) - `, - { urls }, - ) - - return new Map(results.map((row: { id: string; url: string }) => [row.url, row.id])) -} - /** * Get repositories by their URLs * @param qx - Query executor From ca910e91c05d1e6262dc27f90e43584bb3ef4156 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Thu, 22 Jan 2026 17:01:01 +0100 Subject: [PATCH 08/17] chore: change git.SE repo reference to public.repositories --- ...__changeGitServiceExecutionRefreneceToRepositories.sql | 0 ...__changeGitServiceExecutionRefreneceToRepositories.sql | 8 ++++++++ 2 files changed, 8 insertions(+) create mode 100644 backend/src/database/migrations/U1769097489__changeGitServiceExecutionRefreneceToRepositories.sql create mode 100644 backend/src/database/migrations/V1769097489__changeGitServiceExecutionRefreneceToRepositories.sql diff --git a/backend/src/database/migrations/U1769097489__changeGitServiceExecutionRefreneceToRepositories.sql b/backend/src/database/migrations/U1769097489__changeGitServiceExecutionRefreneceToRepositories.sql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/src/database/migrations/V1769097489__changeGitServiceExecutionRefreneceToRepositories.sql b/backend/src/database/migrations/V1769097489__changeGitServiceExecutionRefreneceToRepositories.sql new file mode 100644 index 0000000000..510b6fbd5a --- /dev/null +++ b/backend/src/database/migrations/V1769097489__changeGitServiceExecutionRefreneceToRepositories.sql @@ -0,0 +1,8 @@ +-- Drop the existing foreign key constraint to git.repositories +ALTER TABLE git."serviceExecutions" + DROP CONSTRAINT "serviceExecutions_repoId_fkey"; + +-- Add new foreign key constraint to public.repositories +ALTER TABLE git."serviceExecutions" + ADD CONSTRAINT "serviceExecutions_repoId_fkey" + FOREIGN KEY ("repoId") REFERENCES public.repositories(id) ON DELETE CASCADE; From 3c5f932d802dc383c94a8fa55791a93b6e4f595c Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Thu, 22 Jan 2026 17:08:07 +0100 Subject: [PATCH 09/17] chore: deprecate gitlabRepos --- .../repositories/gitlabReposRepository.ts | 120 ------------------ backend/src/services/integrationService.ts | 23 +--- .../src/integrations/index.ts | 9 +- 3 files changed, 11 insertions(+), 141 deletions(-) delete mode 100644 backend/src/database/repositories/gitlabReposRepository.ts diff --git a/backend/src/database/repositories/gitlabReposRepository.ts b/backend/src/database/repositories/gitlabReposRepository.ts deleted file mode 100644 index 8554be315a..0000000000 --- a/backend/src/database/repositories/gitlabReposRepository.ts +++ /dev/null @@ -1,120 +0,0 @@ -import trim from 'lodash/trim' -import { QueryTypes } from 'sequelize' - -import { DEFAULT_TENANT_ID, Error400 } from '@crowd/common' -import { RedisCache } from '@crowd/redis' - -import { IRepositoryOptions } from './IRepositoryOptions' -import SequelizeRepository from './sequelizeRepository' - -export default class GitlabReposRepository { - private static getCache(options: IRepositoryOptions): RedisCache { - return new RedisCache('gitlabRepos', options.redis, options.log) - } - - private static async bulkInsert( - table, - columns, - placeholdersFn, - values, - options: IRepositoryOptions, - ) { - const transaction = SequelizeRepository.getTransaction(options) - const seq = SequelizeRepository.getSequelize(options) - - columns = columns.map((column) => trim(column, '"')).map((column) => `"${column}"`) - const joinedColumns = columns.join(', ') - - const placeholders = values.map((value, idx) => placeholdersFn(idx)) - - const replacements = values.reduce((acc, value) => { - Object.entries(value).forEach(([key, value]) => { - acc[key] = value - }) - return acc - }, {}) - - return seq.query( - ` - INSERT INTO "${table}" - (${joinedColumns}) - VALUES ${placeholders.join(', ')} - ON CONFLICT ("tenantId", "url") - DO UPDATE SET "segmentId" = EXCLUDED."segmentId", - "integrationId" = EXCLUDED."integrationId" - `, - { - replacements, - transaction, - }, - ) - } - - static async updateMapping(integrationId, mapping, options: IRepositoryOptions) { - const transaction = SequelizeRepository.getTransaction(options) - - // Check for repositories already mapped to other integrations - for (const url of Object.keys(mapping)) { - const existingRows = await options.database.sequelize.query( - `SELECT * FROM "gitlabRepos" WHERE url = :url AND "deletedAt" IS NULL`, - { - replacements: { url }, - type: QueryTypes.SELECT, - transaction, - }, - ) - - for (const row of existingRows as any[]) { - if (!row.deletedAt && row.integrationId !== integrationId) { - options.log.warn( - `Trying to update gitlab repo ${row.url} mapping with integrationId ${integrationId} but it is already mapped to integration ${row.integrationId}!`, - ) - - throw new Error400( - options.language, - 'errors.integrations.repoAlreadyMapped', - row.url, - integrationId, - row.integrationId, - ) - } - } - } - await GitlabReposRepository.bulkInsert( - 'gitlabRepos', - ['tenantId', 'integrationId', 'segmentId', 'url'], - (idx) => `(:tenantId_${idx}, :integrationId_${idx}, :segmentId_${idx}, :url_${idx})`, - Object.entries(mapping).map(([url, segmentId], idx) => ({ - [`tenantId_${idx}`]: DEFAULT_TENANT_ID, - [`integrationId_${idx}`]: integrationId, - [`segmentId_${idx}`]: segmentId, - [`url_${idx}`]: url, - })), - options, - ) - - await this.getCache(options).deleteAll() - } - - static async delete(integrationId, options: IRepositoryOptions) { - const seq = SequelizeRepository.getSequelize(options) - const transaction = SequelizeRepository.getTransaction(options) - - await seq.query( - ` - UPDATE "gitlabRepos" - SET "deletedAt" = NOW() - WHERE "integrationId" = :integrationId - `, - { - replacements: { - integrationId, - }, - type: QueryTypes.UPDATE, - transaction, - }, - ) - - await this.getCache(options).deleteAll() - } -} diff --git a/backend/src/services/integrationService.ts b/backend/src/services/integrationService.ts index dd269be83e..ee7e7efdb6 100644 --- a/backend/src/services/integrationService.ts +++ b/backend/src/services/integrationService.ts @@ -45,7 +45,6 @@ import { CodePlatform, Edition, PlatformType } from '@crowd/types' import { IRepositoryOptions } from '@/database/repositories/IRepositoryOptions' import GithubInstallationsRepository from '@/database/repositories/githubInstallationsRepository' -import GitlabReposRepository from '@/database/repositories/gitlabReposRepository' import IntegrationProgressRepository from '@/database/repositories/integrationProgressRepository' import SegmentRepository from '@/database/repositories/segmentRepository' import { IntegrationProgress, Repos } from '@/serverless/integrations/types/regularTypes' @@ -491,20 +490,12 @@ export default class IntegrationService { } } - if (integration.platform === PlatformType.GITLAB) { - if (integration.settings.webhooks) { - await removeGitlabWebhooks( - integration.token, - integration.settings.webhooks.map((hook) => hook.projectId), - integration.settings.webhooks.map((hook) => hook.hookId), - ) - } - - // soft delete gitlab repos - await GitlabReposRepository.delete(integration.id, { - ...this.options, - transaction, - }) + if (integration.platform === PlatformType.GITLAB && integration.settings.webhooks) { + await removeGitlabWebhooks( + integration.token, + integration.settings.webhooks.map((hook) => hook.projectId), + integration.settings.webhooks.map((hook) => hook.hookId), + ) } if (integration.platform === PlatformType.GIT) { @@ -2880,8 +2871,6 @@ export default class IntegrationService { } try { - await GitlabReposRepository.updateMapping(integrationId, mapping, txOptions) - // add the repos to the git integration if (EDITION === Edition.LFX) { const repos: Record = Object.entries(mapping).reduce( diff --git a/services/libs/data-access-layer/src/integrations/index.ts b/services/libs/data-access-layer/src/integrations/index.ts index beee10b8f3..84e2e23551 100644 --- a/services/libs/data-access-layer/src/integrations/index.ts +++ b/services/libs/data-access-layer/src/integrations/index.ts @@ -562,10 +562,11 @@ export async function removePlainGitlabRepoMapping( ): Promise { await qx.result( ` - update "gitlabRepos" - set "deletedAt" = now() - where "integrationId" = $(integrationId) + update public.repositories + set "deletedAt" = now(), "updatedAt" = now() + where "sourceIntegrationId" = $(integrationId) and lower(url) = lower($(repo)) + and "deletedAt" is null `, { integrationId, @@ -573,7 +574,7 @@ export async function removePlainGitlabRepoMapping( }, ) - const cache = new RedisCache('gitlabRepos', redisClient, log) + const cache = new RedisCache('repoSegmentLookup', redisClient, log) await cache.deleteAll() } From f25e61d8b44f5e7a8a8e88d11fac6dc22c1c2bdb Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Thu, 22 Jan 2026 17:22:05 +0100 Subject: [PATCH 10/17] chore: fix lint & format --- services/apps/nango_worker/src/activities.ts | 2 -- services/apps/nango_worker/src/activities/nangoActivities.ts | 5 ++--- services/libs/data-access-layer/src/integrations/index.ts | 5 ----- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/services/apps/nango_worker/src/activities.ts b/services/apps/nango_worker/src/activities.ts index 55aad296de..73171ee9ef 100644 --- a/services/apps/nango_worker/src/activities.ts +++ b/services/apps/nango_worker/src/activities.ts @@ -4,7 +4,6 @@ import { createGithubConnection, deleteConnection, logInfo, - mapGithubRepo, mapGithubRepoToRepositories, processNangoWebhook, removeGithubConnection, @@ -23,7 +22,6 @@ export { removeGithubConnection, setGithubConnection, startNangoSync, - mapGithubRepo, mapGithubRepoToRepositories, unmapGithubRepo, canCreateGithubConnection, diff --git a/services/apps/nango_worker/src/activities/nangoActivities.ts b/services/apps/nango_worker/src/activities/nangoActivities.ts index cd3b302c5d..c055b2f3e5 100644 --- a/services/apps/nango_worker/src/activities/nangoActivities.ts +++ b/services/apps/nango_worker/src/activities/nangoActivities.ts @@ -1,4 +1,5 @@ import { IS_DEV_ENV, IS_STAGING_ENV, singleOrDefault } from '@crowd/common' +import { generateUUIDv4 as uuid } from '@crowd/common' import { CommonIntegrationService, GithubIntegrationService } from '@crowd/common_services' import { addGithubNangoConnection, @@ -9,7 +10,6 @@ import { setGithubIntegrationSettingsOrgs, setNangoIntegrationCursor, } from '@crowd/data-access-layer/src/integrations' -import { generateUUIDv4 as uuid } from '@crowd/common' import IntegrationStreamRepository from '@crowd/data-access-layer/src/old/apps/integration_stream_worker/integrationStream.repo' import { dbStoreQx } from '@crowd/data-access-layer/src/queryExecutor' import { softDeleteRepositories, upsertRepository } from '@crowd/data-access-layer/src/repositories' @@ -477,8 +477,7 @@ export async function updateGitIntegrationWithRepo( `Updating git integration with repo ${repo.owner}/${repo.repoName} for integration ${integrationId}!`, ) const repoUrl = `https://github.com/${repo.owner}/${repo.repoName}` - const forkedFrom = await GithubIntegrationService.getForkedFrom(repo.owner, repo.repoName) - await addRepoToGitIntegration(dbStoreQx(svc.postgres.writer), integrationId, repoUrl, forkedFrom) + await addRepoToGitIntegration(dbStoreQx(svc.postgres.writer), integrationId, repoUrl) } function parseGithubUrl(url: string): IGithubRepoData { diff --git a/services/libs/data-access-layer/src/integrations/index.ts b/services/libs/data-access-layer/src/integrations/index.ts index 84e2e23551..0ae01a72e2 100644 --- a/services/libs/data-access-layer/src/integrations/index.ts +++ b/services/libs/data-access-layer/src/integrations/index.ts @@ -1,4 +1,3 @@ -import { DEFAULT_TENANT_ID, generateUUIDv4 } from '@crowd/common' import { getServiceChildLogger } from '@crowd/logging' import { RedisCache, RedisClient } from '@crowd/redis' import { IIntegration, PlatformType } from '@crowd/types' @@ -487,12 +486,10 @@ export async function addGithubNangoConnection( ) } - export async function addRepoToGitIntegration( qx: QueryExecutor, integrationId: string, repoUrl: string, - forkedFrom: string | null, ): Promise { // Get the github integration to find its segmentId const githubIntegration = await qx.selectOneOrNone( @@ -550,10 +547,8 @@ export async function addRepoToGitIntegration( ) log.info({ integrationId: gitIntegration.id, repoUrl }, 'Added repo to git integration settings!') - } - export async function removePlainGitlabRepoMapping( qx: QueryExecutor, redisClient: RedisClient, From de5f592e495a1e2dc761443a48d423e8132facc2 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Fri, 23 Jan 2026 11:01:01 +0100 Subject: [PATCH 11/17] chore: deprecate segmentRepositories --- backend/src/services/integrationService.ts | 50 ++----------- .../src/services/integration.service.ts | 18 +---- .../src/collections/index.ts | 70 ------------------- 3 files changed, 6 insertions(+), 132 deletions(-) diff --git a/backend/src/services/integrationService.ts b/backend/src/services/integrationService.ts index ee7e7efdb6..4f94abcf08 100644 --- a/backend/src/services/integrationService.ts +++ b/backend/src/services/integrationService.ts @@ -8,12 +8,7 @@ import { QueryTypes, Transaction } from 'sequelize' import { generateUUIDv4 as uuid } from '@crowd/common' import { EDITION, Error400, Error404, Error500, Error542, encryptData } from '@crowd/common' import { CommonIntegrationService, getGithubInstallationToken } from '@crowd/common_services' -import { - ICreateInsightsProject, - deleteMissingSegmentRepositories, - deleteSegmentRepositories, - upsertSegmentRepositories, -} from '@crowd/data-access-layer/src/collections' +import { ICreateInsightsProject } from '@crowd/data-access-layer/src/collections' import { findRepositoriesForSegment } from '@crowd/data-access-layer/src/integrations' import { ICreateRepository, @@ -530,9 +525,7 @@ export default class IntegrationService { insightsProject = project const widgetsResult = await collectionService.findSegmentsWidgetsById(segmentId) widgets = widgetsResult.widgets - await deleteSegmentRepositories(qx, { - segmentId, - }) + // Note: Repos are soft-deleted in public.repositories via mapUnifiedRepositories above } const insightsRepo = insightsProject?.repositories ?? [] @@ -994,26 +987,9 @@ export default class IntegrationService { {}, ) - const qx = SequelizeRepository.getQueryExecutor(txOptions) const collectionService = new CollectionService(txOptions) - for (const [segmentId, repositories] of Object.entries(repos)) { - this.options.log.info(`Finding insights project for segment ${segmentId}!`) - const [insightsProject] = await collectionService.findInsightsProjectsBySegmentId(segmentId) - - if (insightsProject) { - this.options.log.info(`Upserting segment repositories for segment ${segmentId}!`) - await upsertSegmentRepositories(qx, { - insightsProjectId: insightsProject.id, - repositories, - segmentId, - }) - await deleteMissingSegmentRepositories(qx, { - repositories, - segmentId, - }) - } - } + // Note: Repos are written to public.repositories via gitConnectOrUpdate -> mapUnifiedRepositories // Get integration settings to access forkedFrom data from all orgs const integration = await IntegrationRepository.findById(integrationId, txOptions) @@ -2884,25 +2860,7 @@ export default class IntegrationService { {}, ) - const qx = SequelizeRepository.getQueryExecutor(txOptions) - const collectionService = new CollectionService(txOptions) - - for (const [segmentId, repositories] of Object.entries(repos)) { - const [insightsProject] = - await collectionService.findInsightsProjectsBySegmentId(segmentId) - - if (insightsProject) { - await upsertSegmentRepositories(qx, { - insightsProjectId: insightsProject.id, - repositories, - segmentId, - }) - await deleteMissingSegmentRepositories(qx, { - repositories, - segmentId, - }) - } - } + // Note: Repos are written to public.repositories via mapUnifiedRepositories below for (const [segmentId, urls] of Object.entries(repos)) { let isGitintegrationConfigured diff --git a/services/libs/common_services/src/services/integration.service.ts b/services/libs/common_services/src/services/integration.service.ts index 8eb0410cd1..3ab6ddaed4 100644 --- a/services/libs/common_services/src/services/integration.service.ts +++ b/services/libs/common_services/src/services/integration.service.ts @@ -1,10 +1,8 @@ import { decryptData } from '@crowd/common' import { InsightsProjectField, - deleteMissingSegmentRepositories, queryInsightsProjects, updateInsightsProject, - upsertSegmentRepositories, } from '@crowd/data-access-layer/src/collections' import { fetchIntegrationById, @@ -75,7 +73,7 @@ export class CommonIntegrationService { } /** - * Syncs GitHub repositories to segmentRepositories table and updates insightsProject.repositories + * Syncs GitHub repositories to insightsProject.repositories field * @param qx - Query executor for database operations * @param redis - Redis client for cache invalidation * @param integrationId - The integration ID to sync repositories for @@ -141,20 +139,8 @@ export class CommonIntegrationService { (url) => !reposToBeRemoved.includes(url) && !alreadyMappedRepos.includes(url), ) - // Upsert repositories to segmentRepositories table - await upsertSegmentRepositories(qx, { - insightsProjectId, - repositories, - segmentId, - }) - - // Delete missing repositories from segmentRepositories table - await deleteMissingSegmentRepositories(qx, { - repositories, - segmentId, - }) - // Update insightsProject.repositories field (this also sets updatedAt automatically) + // Note: Writes to public.repositories happen earlier via mapGithubRepoToRepositories() await updateInsightsProject(qx, insightsProjectId, { repositories, }) diff --git a/services/libs/data-access-layer/src/collections/index.ts b/services/libs/data-access-layer/src/collections/index.ts index 0dfbbb9be3..34fb8cc45b 100644 --- a/services/libs/data-access-layer/src/collections/index.ts +++ b/services/libs/data-access-layer/src/collections/index.ts @@ -329,73 +329,3 @@ export async function findBySlug(qx: QueryExecutor, slug: string) { }) return collections } - -export async function upsertSegmentRepositories( - qx: QueryExecutor, - { - insightsProjectId, - repositories, - segmentId, - }: { - insightsProjectId: string - repositories: string[] - segmentId: string - }, -) { - if (repositories.length === 0) { - return - } - - return qx.result( - ` - WITH "input" AS ( - SELECT DISTINCT unnest(ARRAY[$(repositories:csv)]::text[]) AS "repository" - ) - INSERT INTO "segmentRepositories" ("repository", "segmentId", "insightsProjectId") - SELECT "repository", $(segmentId), $(insightsProjectId) - FROM "input" - ON CONFLICT ("repository") - DO UPDATE SET - "segmentId" = EXCLUDED."segmentId", - "insightsProjectId" = EXCLUDED."insightsProjectId"; - `, - { insightsProjectId, repositories, segmentId }, - ) -} - -export async function deleteSegmentRepositories( - qx: QueryExecutor, - { - segmentId, - }: { - segmentId: string - }, -) { - return qx.result( - ` - DELETE FROM "segmentRepositories" - WHERE "segmentId" = '${segmentId}' - `, - { segmentId }, - ) -} - -export async function deleteMissingSegmentRepositories( - qx: QueryExecutor, - { - segmentId, - repositories, - }: { - segmentId: string - repositories: string[] - }, -) { - return qx.result( - ` - DELETE FROM "segmentRepositories" - WHERE "segmentId" = '${segmentId}' - AND ${repositories.length > 0 ? `"repository" != ALL(ARRAY[${repositories.map((repo) => `'${repo}'`).join(', ')}])` : 'TRUE'}; - `, - { segmentId, repositories }, - ) -} From 9f56955027c0560df5fb421f6074fca6aa9faa47 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Fri, 23 Jan 2026 11:10:45 +0100 Subject: [PATCH 12/17] chore: deprecate segmentRepositories from archived_repo checker --- .../archived_repositories/src/database.ts | 32 +++++++------------ 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/services/cronjobs/archived_repositories/src/database.ts b/services/cronjobs/archived_repositories/src/database.ts index 2c6c6d0815..4c8e62af79 100644 --- a/services/cronjobs/archived_repositories/src/database.ts +++ b/services/cronjobs/archived_repositories/src/database.ts @@ -33,16 +33,17 @@ export async function fetchRepositoryUrls(config: Config): Promise { try { const result = await client.query( - `SELECT repository FROM "segmentRepositories" + `SELECT url FROM public.repositories WHERE - (starts_with(repository, 'https://github.com/') OR starts_with(repository, 'https://gitlab.com/')) AND - (last_archived_check IS NULL OR last_archived_check < NOW() - INTERVAL '3 days') - ORDER BY last_archived_check + "deletedAt" IS NULL AND + (starts_with(url, 'https://github.com/') OR starts_with(url, 'https://gitlab.com/')) AND + ("lastArchivedCheckAt" IS NULL OR "lastArchivedCheckAt" < NOW() - INTERVAL '3 days') + ORDER BY "lastArchivedCheckAt" NULLS FIRST LIMIT $1`, [config.BatchSize], ) - return result.rows.map((row) => row.repository) + return result.rows.map((row) => row.url) } catch (error) { console.error('Error fetching repository URLs:', error) throw error @@ -58,21 +59,12 @@ export async function updateRepositoryStatus( const client = getPool(config) try { - await Promise.all([ - // TODO: stop writing to segmentRepositories post migration - client.query( - `UPDATE "segmentRepositories" - SET archived = $1, excluded = $2, last_archived_check = NOW(), updated_at = NOW() - WHERE repository = $3`, - [isArchived, isExcluded, repository], - ), - client.query( - `UPDATE "repositories" - SET "archived" = $1, "excluded" = $2, "lastArchivedCheckAt" = NOW(), "updatedAt" = NOW() - WHERE "url" = $3`, - [isArchived, isExcluded, repository], - ), - ]) + await client.query( + `UPDATE public.repositories + SET "archived" = $1, "excluded" = $2, "lastArchivedCheckAt" = NOW(), "updatedAt" = NOW() + WHERE "url" = $3 AND "deletedAt" IS NULL`, + [isArchived, isExcluded, repository], + ) } catch (error) { console.error(`Error updating repository status for ${repository}:`, error) throw error From b3728f43a30e2b1ad0f6f9beb3f255ac5177e660 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Fri, 23 Jan 2026 13:04:27 +0100 Subject: [PATCH 13/17] fix: legacy github repos sync --- backend/src/services/integrationService.ts | 13 +++++----- .../src/services/integration.service.ts | 6 ----- .../src/integrations/index.ts | 24 ------------------- 3 files changed, 7 insertions(+), 36 deletions(-) diff --git a/backend/src/services/integrationService.ts b/backend/src/services/integrationService.ts index 4f94abcf08..5aa238126c 100644 --- a/backend/src/services/integrationService.ts +++ b/backend/src/services/integrationService.ts @@ -58,7 +58,7 @@ import { } from '@/serverless/integrations/usecases/groupsio/types' import { DISCORD_CONFIG, GITHUB_CONFIG, GITLAB_CONFIG, IS_TEST_ENV, KUBE_MODE } from '../conf/index' -import IntegrationRepository from '../database/repositories/integrationRepository' +import IntegrationRepository from '../database/repositories/integrationRepository' import SequelizeRepository from '../database/repositories/sequelizeRepository' import telemetryTrack from '../segment/telemetryTrack' import track from '../segment/track' @@ -938,9 +938,6 @@ export default class IntegrationService { ) } - // sync to public.repositories - await txService.mapUnifiedRepositories(PlatformType.GITHUB_NANGO, integration.id, mapping) - if (!existingTransaction) { await SequelizeRepository.commitTransaction(transaction) } @@ -989,7 +986,7 @@ export default class IntegrationService { const collectionService = new CollectionService(txOptions) - // Note: Repos are written to public.repositories via gitConnectOrUpdate -> mapUnifiedRepositories + // Note: Repos are synced to public.repositories via mapUnifiedRepositories at the end of this method // Get integration settings to access forkedFrom data from all orgs const integration = await IntegrationRepository.findById(integrationId, txOptions) @@ -1046,6 +1043,10 @@ export default class IntegrationService { } } + // sync to public.repositories + const txService = new IntegrationService(txOptions) + await txService.mapUnifiedRepositories(integration.platform, integrationId, mapping) + if (fireOnboarding) { this.options.log.info('Updating integration status to in-progress!') const integration = await IntegrationRepository.update( @@ -3164,7 +3165,7 @@ export default class IntegrationService { const isGitHubPlatform = [PlatformType.GITHUB, PlatformType.GITHUB_NANGO].includes( sourcePlatform, ) - const sourceIntegration = isGitHubPlatform ? IntegrationRepository.findById(sourceIntegrationId, txOptions) : null + const sourceIntegration = isGitHubPlatform ? await IntegrationRepository.findById(sourceIntegrationId, txOptions) : null if (sourceIntegration?.settings?.orgs) { const allRepos = sourceIntegration.settings.orgs.flatMap((org: any) => org.repos || []) for (const repo of allRepos) { diff --git a/services/libs/common_services/src/services/integration.service.ts b/services/libs/common_services/src/services/integration.service.ts index 3ab6ddaed4..40c22df72d 100644 --- a/services/libs/common_services/src/services/integration.service.ts +++ b/services/libs/common_services/src/services/integration.service.ts @@ -8,7 +8,6 @@ import { fetchIntegrationById, findNangoRepositoriesToBeRemoved, findRepositoriesForSegment, - removePlainGitlabRepoMapping, } from '@crowd/data-access-layer/src/integrations' import { QueryExecutor } from '@crowd/data-access-layer/src/queryExecutor' import { getRepoUrlsMappedToOtherSegments } from '@crowd/data-access-layer/src/segments' @@ -129,11 +128,6 @@ export class CommonIntegrationService { // Find repos already mapped to other segments (conflicts) const alreadyMappedRepos = await getRepoUrlsMappedToOtherSegments(qx, currentUrls, segmentId) - // Unmap repositories that should be removed - for (const repo of reposToBeRemoved) { - await removePlainGitlabRepoMapping(qx, redis, integrationId, repo) - } - // Filter valid repositories (dedupe, remove deleted, remove already mapped to other segments) const repositories = [...new Set(currentUrls)].filter( (url) => !reposToBeRemoved.includes(url) && !alreadyMappedRepos.includes(url), diff --git a/services/libs/data-access-layer/src/integrations/index.ts b/services/libs/data-access-layer/src/integrations/index.ts index 0ae01a72e2..046345bef0 100644 --- a/services/libs/data-access-layer/src/integrations/index.ts +++ b/services/libs/data-access-layer/src/integrations/index.ts @@ -549,30 +549,6 @@ export async function addRepoToGitIntegration( log.info({ integrationId: gitIntegration.id, repoUrl }, 'Added repo to git integration settings!') } -export async function removePlainGitlabRepoMapping( - qx: QueryExecutor, - redisClient: RedisClient, - integrationId: string, - repo: string, -): Promise { - await qx.result( - ` - update public.repositories - set "deletedAt" = now(), "updatedAt" = now() - where "sourceIntegrationId" = $(integrationId) - and lower(url) = lower($(repo)) - and "deletedAt" is null - `, - { - integrationId, - repo, - }, - ) - - const cache = new RedisCache('repoSegmentLookup', redisClient, log) - await cache.deleteAll() -} - export function extractGithubRepoSlug(url: string): string { const parsedUrl = new URL(url) const pathname = parsedUrl.pathname From 3c0bbde2728a39657da8c05bf547ea116fae6f9d Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Fri, 23 Jan 2026 13:14:21 +0100 Subject: [PATCH 14/17] fix: lint --- services/libs/data-access-layer/src/integrations/index.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/services/libs/data-access-layer/src/integrations/index.ts b/services/libs/data-access-layer/src/integrations/index.ts index 046345bef0..e4253d70cd 100644 --- a/services/libs/data-access-layer/src/integrations/index.ts +++ b/services/libs/data-access-layer/src/integrations/index.ts @@ -1,5 +1,4 @@ import { getServiceChildLogger } from '@crowd/logging' -import { RedisCache, RedisClient } from '@crowd/redis' import { IIntegration, PlatformType } from '@crowd/types' import { QueryExecutor } from '../queryExecutor' From 674c0d97a610a030ea8a0c58a8604908a507a38b Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Fri, 23 Jan 2026 14:34:35 +0100 Subject: [PATCH 15/17] fix: lint --- backend/src/services/integrationService.ts | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/backend/src/services/integrationService.ts b/backend/src/services/integrationService.ts index 5aa238126c..4ac4fb75d6 100644 --- a/backend/src/services/integrationService.ts +++ b/backend/src/services/integrationService.ts @@ -5,8 +5,7 @@ import axios, { AxiosRequestConfig, AxiosResponse } from 'axios' import lodash from 'lodash' import moment from 'moment' import { QueryTypes, Transaction } from 'sequelize' -import { generateUUIDv4 as uuid } from '@crowd/common' -import { EDITION, Error400, Error404, Error500, Error542, encryptData } from '@crowd/common' +import { generateUUIDv4 as uuid, EDITION, Error400, Error404, Error542, encryptData } from '@crowd/common' import { CommonIntegrationService, getGithubInstallationToken } from '@crowd/common_services' import { ICreateInsightsProject } from '@crowd/data-access-layer/src/collections' import { findRepositoriesForSegment } from '@crowd/data-access-layer/src/integrations' @@ -515,8 +514,6 @@ export default class IntegrationService { const collectionService = new CollectionService({ ...this.options, transaction }) - const qx = SequelizeRepository.getQueryExecutor(this.options) - let insightsProject = null let widgets = [] @@ -983,9 +980,6 @@ export default class IntegrationService { }, {}, ) - - const collectionService = new CollectionService(txOptions) - // Note: Repos are synced to public.repositories via mapUnifiedRepositories at the end of this method // Get integration settings to access forkedFrom data from all orgs @@ -1397,11 +1391,6 @@ export default class IntegrationService { } const currentSegmentId = (options || this.options).currentSegments[0].id - const qx = SequelizeRepository.getQueryExecutor({ - ...(options || this.options), - transaction, - }) - // sync to public.repositories (only for direct GIT connections, other platforms handle it themselves) if (!sourcePlatform) { const mapping = remotes.reduce( From 77c9083856717c357a4e4b8fe919257983e87bdd Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Fri, 23 Jan 2026 14:38:53 +0100 Subject: [PATCH 16/17] fix: format --- backend/src/services/integrationService.ts | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/backend/src/services/integrationService.ts b/backend/src/services/integrationService.ts index 4ac4fb75d6..7461dfdff9 100644 --- a/backend/src/services/integrationService.ts +++ b/backend/src/services/integrationService.ts @@ -5,7 +5,15 @@ import axios, { AxiosRequestConfig, AxiosResponse } from 'axios' import lodash from 'lodash' import moment from 'moment' import { QueryTypes, Transaction } from 'sequelize' -import { generateUUIDv4 as uuid, EDITION, Error400, Error404, Error542, encryptData } from '@crowd/common' + +import { + EDITION, + Error400, + Error404, + Error542, + encryptData, + generateUUIDv4 as uuid, +} from '@crowd/common' import { CommonIntegrationService, getGithubInstallationToken } from '@crowd/common_services' import { ICreateInsightsProject } from '@crowd/data-access-layer/src/collections' import { findRepositoriesForSegment } from '@crowd/data-access-layer/src/integrations' @@ -3154,7 +3162,9 @@ export default class IntegrationService { const isGitHubPlatform = [PlatformType.GITHUB, PlatformType.GITHUB_NANGO].includes( sourcePlatform, ) - const sourceIntegration = isGitHubPlatform ? await IntegrationRepository.findById(sourceIntegrationId, txOptions) : null + const sourceIntegration = isGitHubPlatform + ? await IntegrationRepository.findById(sourceIntegrationId, txOptions) + : null if (sourceIntegration?.settings?.orgs) { const allRepos = sourceIntegration.settings.orgs.flatMap((org: any) => org.repos || []) for (const repo of allRepos) { @@ -3172,7 +3182,6 @@ export default class IntegrationService { const insightsProjectId = insightsProjectMap.get(segmentId) const gitIntegrationId = gitIntegrationMap.get(segmentId) - payloads.push({ id, url, From 6ba2fc559de11d2853a0e6213a032e35860a093c Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Fri, 23 Jan 2026 15:51:02 +0100 Subject: [PATCH 17/17] fix: using gitIntegrationId instead of sourceIntegration when checking git integration duplicates --- backend/src/services/integrationService.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/services/integrationService.ts b/backend/src/services/integrationService.ts index 7461dfdff9..3ed0279c91 100644 --- a/backend/src/services/integrationService.ts +++ b/backend/src/services/integrationService.ts @@ -1372,7 +1372,7 @@ export default class IntegrationService { const existingRows = await seq.query( ` - SELECT url, "sourceIntegrationId" AS "integrationId" FROM repositories + SELECT url, "gitIntegrationId" AS "integrationId" FROM repositories WHERE url IN (:urls) AND "deletedAt" IS NULL `, {