Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion appinfo/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ Refer to the [Context Chat Backend's readme](https://github.com/nextcloud/contex
</dependencies>
<background-jobs>
<job>OCA\ContextChat\BackgroundJobs\FileSystemListenerJob</job>
<job>OCA\ContextChat\BackgroundJobs\ActionJob</job>
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it doesn't exist now, should be done in the feat/reverse-content-flow branch too off of which this one is based

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, didn't see that

<job>OCA\ContextChat\BackgroundJobs\RotateLogsJob</job>
</background-jobs>
<commands>
Expand Down
29 changes: 29 additions & 0 deletions lib/AppInfo/Application.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,35 @@ class Application extends App implements IBootstrap {
'text/org',
];

public const IMAGE_MIMETYPES = [
'image/bmp',
'image/bpg',
'image/emf',
'image/gif',
'image/heic',
'image/heif',
'image/jp2',
'image/jpeg',
'image/png',
'image/svg+xml',
'image/tga',
'image/tiff',
'image/webp',
'image/x-dcraw',
'image/x-icon',
];

public const AUDIO_MIMETYPES = [
'audio/aac',
'audio/flac',
'audio/mp4',
'audio/mpeg',
'audio/ogg',
'audio/wav',
'audio/webm',
'audio/x-scpls',
];

public function __construct(array $urlParams = []) {
parent::__construct(self::APP_ID, $urlParams);
}
Expand Down
9 changes: 7 additions & 2 deletions lib/BackgroundJobs/StorageCrawlJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@

namespace OCA\ContextChat\BackgroundJobs;

use OCA\ContextChat\AppInfo\Application;
use OCA\ContextChat\Db\QueueFile;
use OCA\ContextChat\Logger;
use OCA\ContextChat\Service\DiagnosticService;
use OCA\ContextChat\Service\QueueService;
use OCA\ContextChat\Service\StorageService;
use OCA\ContextChat\Service\TaskTypeService;
use OCP\AppFramework\Services\IAppConfig;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\BackgroundJob\IJobList;
Expand All @@ -33,19 +35,22 @@ public function __construct(
private StorageService $storageService,
private DiagnosticService $diagnosticService,
private IAppConfig $appConfig,
private TaskTypeService $taskTypeService,
) {
parent::__construct($timeFactory);
}

/**
* @param array{storage_id:int, root_id:int, overridden_root:int|null, override_root:int|null, last_file_id:int} $argument
* @param array{storage_id:int, root_id:int, overridden_root:int|null, override_root:int|null, last_file_id:int, only_non_textual?:bool} $argument
* @return void
*/
protected function run($argument): void {
$storageId = $argument['storage_id'];
$rootId = $argument['root_id'];
$overrideRoot = $argument['overridden_root'] ?? $argument['override_root'] ?? $rootId;
$lastFileId = $argument['last_file_id'];
$onlyNonTextual = $argument['only_non_textual'] ?? false;
$mimeTypes = $this->taskTypeService->getMultimodalMimetypes(!$onlyNonTextual);

// Remove current iteration
$this->jobList->remove(self::class, $argument);
Expand All @@ -56,7 +61,7 @@ protected function run($argument): void {

$mountFilesCount = 0;
$lastSuccessfulFileId = -1;
foreach ($this->storageService->getFilesInMount($storageId, $overrideRoot ?? $rootId, $lastFileId, self::BATCH_SIZE) as $fileId) {
foreach ($this->storageService->getFilesInMount($storageId, $overrideRoot ?? $rootId, $lastFileId, self::BATCH_SIZE, $mimeTypes) as $fileId) {
$queueFile = new QueueFile();
$queueFile->setStorageId($storageId);
$queueFile->setRootId($rootId);
Expand Down
6 changes: 4 additions & 2 deletions lib/Listener/ShareListener.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@

namespace OCA\ContextChat\Listener;

use OCA\ContextChat\AppInfo\Application;
use OCA\ContextChat\Logger;
use OCA\ContextChat\Public\UpdateAccessOp;
use OCA\ContextChat\Service\ActionScheduler;
use OCA\ContextChat\Service\ProviderConfigService;
use OCA\ContextChat\Service\StorageService;
use OCA\ContextChat\Service\TaskTypeService;
use OCP\EventDispatcher\Event;
use OCP\EventDispatcher\IEventListener;
use OCP\Files\FileInfo;
Expand All @@ -37,6 +37,7 @@ public function __construct(
private IManager $shareManager,
private ActionScheduler $actionService,
private IGroupManager $groupManager,
private TaskTypeService $taskTypeService,
) {
}

Expand Down Expand Up @@ -145,6 +146,7 @@ public function handle(Event $event): void {

private function allowedMimeType(Node $file): bool {
$mimeType = $file->getMimeType();
return in_array($mimeType, Application::MIMETYPES, true);
$mimeTypes = $this->taskTypeService->getMultimodalMimetypes();
return in_array($mimeType, $mimeTypes, true);
}
}
68 changes: 68 additions & 0 deletions lib/Migration/Version006000000Date20260316135634.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<?php

declare(strict_types=1);

/**
* SPDX-FileCopyrightText: 2026 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

namespace OCA\ContextChat\Migration;

use Closure;
use OCA\ContextChat\BackgroundJobs\StorageCrawlJob;
use OCA\ContextChat\Logger;
use OCA\ContextChat\Service\StorageService;
use OCA\ContextChat\Service\TaskTypeService;
use OCP\BackgroundJob\IJobList;
use OCP\Migration\IOutput;
use OCP\Migration\SimpleMigrationStep;

class Version006000000Date20260316135634 extends SimpleMigrationStep {
public function __construct(
private TaskTypeService $taskTypeService,
private StorageService $storageService,
private IJobList $jobList,
private Logger $logger,
) {
}

public function name(): string {
return 'Queue existing multimodal files (Images and Audio) for indexation.';
}

public function description(): string {
return 'This migration queues existing multimodal files (Images and Audio) for indexation.'
. ' Each type of files is queued only if the required TaskProcessing task provider is available.'
. ' OCR for Images and Speech-to-text for Audio.'
. ' See https://docs.nextcloud.com/server/latest/admin_manual/ai/overview.html for more information.';
}

public function postSchemaChange(IOutput $output, \Closure $schemaClosure, array $options): void {
if (!$this->taskTypeService->isOcrTaskTypeAvailable()) {
$output->warning('[Context Chat] OCR task type is not available, image files will not be indexed.');
}
if (!$this->taskTypeService->isSpeechToTextTaskTypeAvailable()) {
$output->warning('[Context Chat] Speech-to-text task type is not available, audio files will not be indexed.');
}

try {
foreach ($this->storageService->getMounts() as $mount) {
$this->logger->debug('Scheduling StorageCrawlJob storage_id=' . $mount['storage_id'] . ' root_id=' . $mount['root_id' ] . 'override_root=' . $mount['overridden_root']);
$this->jobList->add(StorageCrawlJob::class, [
'storage_id' => $mount['storage_id'],
'root_id' => $mount['root_id' ],
'overridden_root' => $mount['overridden_root'],
'last_file_id' => 0,
'only_non_textual' => true,
]);
}
} catch (\Exception $e) {
$this->logger->error('Failed to schedule StorageCrawlJob to find files for indexation.', ['exception' => $e]);
$output->warning('Failed to schedule StorageCrawlJob to find files for indexation: ' . $e->getMessage());
return;
}

$output->info('Multimodal files have been scheduled to be queued for indexation.');
}
}
7 changes: 3 additions & 4 deletions lib/Service/FsEventService.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@

namespace OCA\ContextChat\Service;

use OCA\ContextChat\AppInfo\Application;
use OCA\ContextChat\Db\QueueFile;
use OCA\ContextChat\Logger;
use OCP\DB\Exception;
use OCP\Files\Folder;
use OCP\Files\InvalidPathException;
use OCP\Files\IRootFolder;
use OCP\Files\Node;
use OCP\Files\NotFoundException;

Expand All @@ -24,7 +22,7 @@ public function __construct(
private QueueService $queue,
private ActionScheduler $actionService,
private StorageService $storageService,
private IRootFolder $rootFolder,
private TaskTypeService $taskTypeService,
) {

}
Expand Down Expand Up @@ -134,7 +132,8 @@ public function onInsert(Node $node, bool $recurse = true, bool $update = false)

private function allowedMimeType(Node $file): bool {
$mimeType = $file->getMimeType();
return in_array($mimeType, Application::MIMETYPES, true);
$mimeTypes = $this->taskTypeService->getMultimodalMimetypes();
return in_array($mimeType, $mimeTypes, true);
}

private function allowedPath(Node $file): bool {
Expand Down
48 changes: 36 additions & 12 deletions lib/Service/StorageService.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
use OCP\Files\Config\IUserMountCache;
use OCP\Files\Folder;
use OCP\Files\IMimeTypeLoader;
use OCP\Files\IRootFolder;
use OCP\Files\Node;
use OCP\FilesMetadata\IFilesMetadataManager;
use OCP\IDBConnection;
Expand All @@ -44,8 +43,8 @@ public function __construct(
private IMimeTypeLoader $mimeTypes,
private IUserMountCache $userMountCache,
private IFilesMetadataManager $metadataManager,
private IRootFolder $rootFolder,
private IFileAccess $fileAccess,
private TaskTypeService $taskTypeService,
) {
}

Expand Down Expand Up @@ -84,7 +83,8 @@ public function countFilesInMount(int $storageId, int $rootId): int {
return 0;
}

$mimeTypes = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Application::MIMETYPES);
$mimeTypes = $this->taskTypeService->getMultimodalMimetypes();
$mimeTypesIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), $mimeTypes);

$qb = $this->getCacheQueryBuilder();

Expand All @@ -110,7 +110,7 @@ public function countFilesInMount(int $storageId, int $rootId): int {
->andWhere($qb->expr()->notLike('filecache.path', $qb->createNamedParameter('files_versions/%')))
->andWhere($qb->expr()->notLike('filecache.path', $qb->createNamedParameter('files_trashbin/%')))
->andWhere($qb->expr()->eq('filecache.storage', $qb->createNamedParameter($storageId)))
->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypes, IQueryBuilder::PARAM_INT_ARRAY)))
->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypesIds, IQueryBuilder::PARAM_INT_ARRAY)))
->andWhere($qb->expr()->lte('filecache.size', $qb->createNamedParameter(Application::CC_MAX_SIZE, IQueryBuilder::PARAM_INT)))
->andWhere($qb->expr()->gt('filecache.size', $qb->createNamedParameter(0, IQueryBuilder::PARAM_INT)));
$result = $qb->executeQuery();
Expand Down Expand Up @@ -199,25 +199,42 @@ private function getMountsOld(): \Generator {
* @param int $rootId
* @param int $lastFileId
* @param int $maxResults
* @param list<string> $mimeTypes
* @return \Generator<int,int,mixed,void>
*/
public function getFilesInMount(int $storageId, int $rootId, int $lastFileId = 0, int $maxResults = 100): \Generator {
public function getFilesInMount(
int $storageId,
int $rootId,
int $lastFileId = 0,
int $maxResults = 100,
array $mimeTypes = [],
): \Generator {
if ($mimeTypes === []) {
$mimeTypes = $this->taskTypeService->getMultimodalMimetypes();
}
if (!$this->isFileAccessAvailable()) {
return $this->getFilesInMountOld($storageId, $rootId, $lastFileId, $maxResults);
return $this->getFilesInMountOld($storageId, $rootId, $lastFileId, $maxResults, $mimeTypes);
}

return $this->getFilesInMountUsingFileAccess($storageId, $rootId, $lastFileId, $maxResults);
return $this->getFilesInMountUsingFileAccess($storageId, $rootId, $lastFileId, $maxResults, $mimeTypes);
}

/**
* @param int $storageId
* @param int $rootId
* @param int $lastFileId
* @param int $maxResults
* @param list<string> $mimeTypes
* @return \Generator<int,int,mixed,void>
*/
private function getFilesInMountUsingFileAccess(int $storageId, int $rootId, int $lastFileId = 0, int $maxResults = 100): \Generator {
$mimeTypeIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Application::MIMETYPES);
private function getFilesInMountUsingFileAccess(
int $storageId,
int $rootId,
int $lastFileId = 0,
int $maxResults = 100,
array $mimeTypes = Application::MIMETYPES,
): \Generator {
$mimeTypeIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), $mimeTypes);
foreach ($this->fileAccess->getByAncestorInStorage($storageId, $rootId, $lastFileId, $maxResults, $mimeTypeIds, false, true) as $cacheEntry) {
yield $cacheEntry['fileid'];
}
Expand All @@ -228,9 +245,16 @@ private function getFilesInMountUsingFileAccess(int $storageId, int $rootId, int
* @param int $rootId
* @param int $lastFileId
* @param int $maxResults
* @param list<string> $mimeTypes
* @return \Generator<int,int,mixed,void>
*/
private function getFilesInMountOld(int $storageId, int $rootId, int $lastFileId = 0, int $maxResults = 100): \Generator {
private function getFilesInMountOld(
int $storageId,
int $rootId,
int $lastFileId = 0,
int $maxResults = 100,
array $mimeTypes = Application::MIMETYPES,
): \Generator {
$qb = $this->getCacheQueryBuilder();
try {
$qb->selectFileCache();
Expand All @@ -249,7 +273,7 @@ private function getFilesInMountOld(int $storageId, int $rootId, int $lastFileId
return;
}

$mimeTypes = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Application::MIMETYPES);
$mimeTypesIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), $mimeTypes);

$qb = $this->getCacheQueryBuilder();

Expand All @@ -272,7 +296,7 @@ private function getFilesInMountOld(int $storageId, int $rootId, int $lastFileId
->andWhere($qb->expr()->like('filecache.path', $qb->createNamedParameter($path . '%')))
->andWhere($qb->expr()->eq('filecache.storage', $qb->createNamedParameter($storageId)))
->andWhere($qb->expr()->gt('filecache.fileid', $qb->createNamedParameter($lastFileId)))
->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypes, IQueryBuilder::PARAM_INT_ARRAY)));
->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypesIds, IQueryBuilder::PARAM_INT_ARRAY)));

if ($maxResults !== 0) {
$qb->setMaxResults($maxResults);
Expand Down
Loading
Loading