diff --git a/appinfo/info.xml b/appinfo/info.xml
index 9394a0a..744f35e 100644
--- a/appinfo/info.xml
+++ b/appinfo/info.xml
@@ -44,7 +44,6 @@ Refer to the [Context Chat Backend's readme](https://github.com/nextcloud/contex
OCA\ContextChat\BackgroundJobs\FileSystemListenerJob
- OCA\ContextChat\BackgroundJobs\ActionJob
OCA\ContextChat\BackgroundJobs\RotateLogsJob
diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php
index 0ea6b81..24e87cb 100644
--- a/lib/AppInfo/Application.php
+++ b/lib/AppInfo/Application.php
@@ -61,6 +61,35 @@ class Application extends App implements IBootstrap {
'text/org',
];
+ public const IMAGE_MIMETYPES = [
+ 'image/bmp',
+ 'image/bpg',
+ 'image/emf',
+ 'image/gif',
+ 'image/heic',
+ 'image/heif',
+ 'image/jp2',
+ 'image/jpeg',
+ 'image/png',
+ 'image/svg+xml',
+ 'image/tga',
+ 'image/tiff',
+ 'image/webp',
+ 'image/x-dcraw',
+ 'image/x-icon',
+ ];
+
+ public const AUDIO_MIMETYPES = [
+ 'audio/aac',
+ 'audio/flac',
+ 'audio/mp4',
+ 'audio/mpeg',
+ 'audio/ogg',
+ 'audio/wav',
+ 'audio/webm',
+ 'audio/x-scpls',
+ ];
+
public function __construct(array $urlParams = []) {
parent::__construct(self::APP_ID, $urlParams);
}
diff --git a/lib/BackgroundJobs/StorageCrawlJob.php b/lib/BackgroundJobs/StorageCrawlJob.php
index 1447fed..9436e40 100644
--- a/lib/BackgroundJobs/StorageCrawlJob.php
+++ b/lib/BackgroundJobs/StorageCrawlJob.php
@@ -10,11 +10,13 @@
namespace OCA\ContextChat\BackgroundJobs;
+use OCA\ContextChat\AppInfo\Application;
use OCA\ContextChat\Db\QueueFile;
use OCA\ContextChat\Logger;
use OCA\ContextChat\Service\DiagnosticService;
use OCA\ContextChat\Service\QueueService;
use OCA\ContextChat\Service\StorageService;
+use OCA\ContextChat\Service\TaskTypeService;
use OCP\AppFramework\Services\IAppConfig;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\BackgroundJob\IJobList;
@@ -33,12 +35,13 @@ public function __construct(
private StorageService $storageService,
private DiagnosticService $diagnosticService,
private IAppConfig $appConfig,
+ private TaskTypeService $taskTypeService,
) {
parent::__construct($timeFactory);
}
/**
- * @param array{storage_id:int, root_id:int, overridden_root:int|null, override_root:int|null, last_file_id:int} $argument
+ * @param array{storage_id:int, root_id:int, overridden_root:int|null, override_root:int|null, last_file_id:int, only_non_textual?:bool} $argument
* @return void
*/
protected function run($argument): void {
@@ -46,6 +49,8 @@ protected function run($argument): void {
$rootId = $argument['root_id'];
$overrideRoot = $argument['overridden_root'] ?? $argument['override_root'] ?? $rootId;
$lastFileId = $argument['last_file_id'];
+ $onlyNonTextual = $argument['only_non_textual'] ?? false;
+ $mimeTypes = $this->taskTypeService->getMultimodalMimetypes(!$onlyNonTextual);
// Remove current iteration
$this->jobList->remove(self::class, $argument);
@@ -56,7 +61,7 @@ protected function run($argument): void {
$mountFilesCount = 0;
$lastSuccessfulFileId = -1;
- foreach ($this->storageService->getFilesInMount($storageId, $overrideRoot ?? $rootId, $lastFileId, self::BATCH_SIZE) as $fileId) {
+ foreach ($this->storageService->getFilesInMount($storageId, $overrideRoot ?? $rootId, $lastFileId, self::BATCH_SIZE, $mimeTypes) as $fileId) {
$queueFile = new QueueFile();
$queueFile->setStorageId($storageId);
$queueFile->setRootId($rootId);
diff --git a/lib/Listener/ShareListener.php b/lib/Listener/ShareListener.php
index 528bd4f..6f76805 100644
--- a/lib/Listener/ShareListener.php
+++ b/lib/Listener/ShareListener.php
@@ -10,12 +10,12 @@
namespace OCA\ContextChat\Listener;
-use OCA\ContextChat\AppInfo\Application;
use OCA\ContextChat\Logger;
use OCA\ContextChat\Public\UpdateAccessOp;
use OCA\ContextChat\Service\ActionScheduler;
use OCA\ContextChat\Service\ProviderConfigService;
use OCA\ContextChat\Service\StorageService;
+use OCA\ContextChat\Service\TaskTypeService;
use OCP\EventDispatcher\Event;
use OCP\EventDispatcher\IEventListener;
use OCP\Files\FileInfo;
@@ -37,6 +37,7 @@ public function __construct(
private IManager $shareManager,
private ActionScheduler $actionService,
private IGroupManager $groupManager,
+ private TaskTypeService $taskTypeService,
) {
}
@@ -145,6 +146,7 @@ public function handle(Event $event): void {
private function allowedMimeType(Node $file): bool {
$mimeType = $file->getMimeType();
- return in_array($mimeType, Application::MIMETYPES, true);
+ $mimeTypes = $this->taskTypeService->getMultimodalMimetypes();
+ return in_array($mimeType, $mimeTypes, true);
}
}
diff --git a/lib/Migration/Version006000000Date20260316135634.php b/lib/Migration/Version006000000Date20260316135634.php
new file mode 100644
index 0000000..5ed9c91
--- /dev/null
+++ b/lib/Migration/Version006000000Date20260316135634.php
@@ -0,0 +1,68 @@
+taskTypeService->isOcrTaskTypeAvailable()) {
+ $output->warning('[Context Chat] OCR task type is not available, image files will not be indexed.');
+ }
+ if (!$this->taskTypeService->isSpeechToTextTaskTypeAvailable()) {
+ $output->warning('[Context Chat] Speech-to-text task type is not available, audio files will not be indexed.');
+ }
+
+ try {
+ foreach ($this->storageService->getMounts() as $mount) {
+ $this->logger->debug('Scheduling StorageCrawlJob storage_id=' . $mount['storage_id'] . ' root_id=' . $mount['root_id' ] . 'override_root=' . $mount['overridden_root']);
+ $this->jobList->add(StorageCrawlJob::class, [
+ 'storage_id' => $mount['storage_id'],
+ 'root_id' => $mount['root_id' ],
+ 'overridden_root' => $mount['overridden_root'],
+ 'last_file_id' => 0,
+ 'only_non_textual' => true,
+ ]);
+ }
+ } catch (\Exception $e) {
+ $this->logger->error('Failed to schedule StorageCrawlJob to find files for indexation.', ['exception' => $e]);
+ $output->warning('Failed to schedule StorageCrawlJob to find files for indexation: ' . $e->getMessage());
+ return;
+ }
+
+ $output->info('Multimodal files have been scheduled to be queued for indexation.');
+ }
+}
diff --git a/lib/Service/FsEventService.php b/lib/Service/FsEventService.php
index 58adb0f..c6bc432 100644
--- a/lib/Service/FsEventService.php
+++ b/lib/Service/FsEventService.php
@@ -7,13 +7,11 @@
namespace OCA\ContextChat\Service;
-use OCA\ContextChat\AppInfo\Application;
use OCA\ContextChat\Db\QueueFile;
use OCA\ContextChat\Logger;
use OCP\DB\Exception;
use OCP\Files\Folder;
use OCP\Files\InvalidPathException;
-use OCP\Files\IRootFolder;
use OCP\Files\Node;
use OCP\Files\NotFoundException;
@@ -24,7 +22,7 @@ public function __construct(
private QueueService $queue,
private ActionScheduler $actionService,
private StorageService $storageService,
- private IRootFolder $rootFolder,
+ private TaskTypeService $taskTypeService,
) {
}
@@ -134,7 +132,8 @@ public function onInsert(Node $node, bool $recurse = true, bool $update = false)
private function allowedMimeType(Node $file): bool {
$mimeType = $file->getMimeType();
- return in_array($mimeType, Application::MIMETYPES, true);
+ $mimeTypes = $this->taskTypeService->getMultimodalMimetypes();
+ return in_array($mimeType, $mimeTypes, true);
}
private function allowedPath(Node $file): bool {
diff --git a/lib/Service/StorageService.php b/lib/Service/StorageService.php
index b937259..7709bea 100644
--- a/lib/Service/StorageService.php
+++ b/lib/Service/StorageService.php
@@ -20,7 +20,6 @@
use OCP\Files\Config\IUserMountCache;
use OCP\Files\Folder;
use OCP\Files\IMimeTypeLoader;
-use OCP\Files\IRootFolder;
use OCP\Files\Node;
use OCP\FilesMetadata\IFilesMetadataManager;
use OCP\IDBConnection;
@@ -44,8 +43,8 @@ public function __construct(
private IMimeTypeLoader $mimeTypes,
private IUserMountCache $userMountCache,
private IFilesMetadataManager $metadataManager,
- private IRootFolder $rootFolder,
private IFileAccess $fileAccess,
+ private TaskTypeService $taskTypeService,
) {
}
@@ -84,7 +83,8 @@ public function countFilesInMount(int $storageId, int $rootId): int {
return 0;
}
- $mimeTypes = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Application::MIMETYPES);
+ $mimeTypes = $this->taskTypeService->getMultimodalMimetypes();
+ $mimeTypesIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), $mimeTypes);
$qb = $this->getCacheQueryBuilder();
@@ -110,7 +110,7 @@ public function countFilesInMount(int $storageId, int $rootId): int {
->andWhere($qb->expr()->notLike('filecache.path', $qb->createNamedParameter('files_versions/%')))
->andWhere($qb->expr()->notLike('filecache.path', $qb->createNamedParameter('files_trashbin/%')))
->andWhere($qb->expr()->eq('filecache.storage', $qb->createNamedParameter($storageId)))
- ->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypes, IQueryBuilder::PARAM_INT_ARRAY)))
+ ->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypesIds, IQueryBuilder::PARAM_INT_ARRAY)))
->andWhere($qb->expr()->lte('filecache.size', $qb->createNamedParameter(Application::CC_MAX_SIZE, IQueryBuilder::PARAM_INT)))
->andWhere($qb->expr()->gt('filecache.size', $qb->createNamedParameter(0, IQueryBuilder::PARAM_INT)));
$result = $qb->executeQuery();
@@ -199,14 +199,24 @@ private function getMountsOld(): \Generator {
* @param int $rootId
* @param int $lastFileId
* @param int $maxResults
+ * @param list $mimeTypes
* @return \Generator
*/
- public function getFilesInMount(int $storageId, int $rootId, int $lastFileId = 0, int $maxResults = 100): \Generator {
+ public function getFilesInMount(
+ int $storageId,
+ int $rootId,
+ int $lastFileId = 0,
+ int $maxResults = 100,
+ array $mimeTypes = [],
+ ): \Generator {
+ if ($mimeTypes === []) {
+ $mimeTypes = $this->taskTypeService->getMultimodalMimetypes();
+ }
if (!$this->isFileAccessAvailable()) {
- return $this->getFilesInMountOld($storageId, $rootId, $lastFileId, $maxResults);
+ return $this->getFilesInMountOld($storageId, $rootId, $lastFileId, $maxResults, $mimeTypes);
}
- return $this->getFilesInMountUsingFileAccess($storageId, $rootId, $lastFileId, $maxResults);
+ return $this->getFilesInMountUsingFileAccess($storageId, $rootId, $lastFileId, $maxResults, $mimeTypes);
}
/**
@@ -214,10 +224,17 @@ public function getFilesInMount(int $storageId, int $rootId, int $lastFileId = 0
* @param int $rootId
* @param int $lastFileId
* @param int $maxResults
+ * @param list $mimeTypes
* @return \Generator
*/
- private function getFilesInMountUsingFileAccess(int $storageId, int $rootId, int $lastFileId = 0, int $maxResults = 100): \Generator {
- $mimeTypeIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Application::MIMETYPES);
+ private function getFilesInMountUsingFileAccess(
+ int $storageId,
+ int $rootId,
+ int $lastFileId = 0,
+ int $maxResults = 100,
+ array $mimeTypes = Application::MIMETYPES,
+ ): \Generator {
+ $mimeTypeIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), $mimeTypes);
foreach ($this->fileAccess->getByAncestorInStorage($storageId, $rootId, $lastFileId, $maxResults, $mimeTypeIds, false, true) as $cacheEntry) {
yield $cacheEntry['fileid'];
}
@@ -228,9 +245,16 @@ private function getFilesInMountUsingFileAccess(int $storageId, int $rootId, int
* @param int $rootId
* @param int $lastFileId
* @param int $maxResults
+ * @param list $mimeTypes
* @return \Generator
*/
- private function getFilesInMountOld(int $storageId, int $rootId, int $lastFileId = 0, int $maxResults = 100): \Generator {
+ private function getFilesInMountOld(
+ int $storageId,
+ int $rootId,
+ int $lastFileId = 0,
+ int $maxResults = 100,
+ array $mimeTypes = Application::MIMETYPES,
+ ): \Generator {
$qb = $this->getCacheQueryBuilder();
try {
$qb->selectFileCache();
@@ -249,7 +273,7 @@ private function getFilesInMountOld(int $storageId, int $rootId, int $lastFileId
return;
}
- $mimeTypes = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Application::MIMETYPES);
+ $mimeTypesIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), $mimeTypes);
$qb = $this->getCacheQueryBuilder();
@@ -272,7 +296,7 @@ private function getFilesInMountOld(int $storageId, int $rootId, int $lastFileId
->andWhere($qb->expr()->like('filecache.path', $qb->createNamedParameter($path . '%')))
->andWhere($qb->expr()->eq('filecache.storage', $qb->createNamedParameter($storageId)))
->andWhere($qb->expr()->gt('filecache.fileid', $qb->createNamedParameter($lastFileId)))
- ->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypes, IQueryBuilder::PARAM_INT_ARRAY)));
+ ->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypesIds, IQueryBuilder::PARAM_INT_ARRAY)));
if ($maxResults !== 0) {
$qb->setMaxResults($maxResults);
diff --git a/lib/Service/TaskTypeService.php b/lib/Service/TaskTypeService.php
new file mode 100644
index 0000000..cbe0bc5
--- /dev/null
+++ b/lib/Service/TaskTypeService.php
@@ -0,0 +1,59 @@
+taskProcessingManager->getPreferredProvider(self::OCR_TASK_TYPE);
+ return true;
+ } catch (\Exception $e) {
+ $this->logger->debug('OCR task type is not available: ' . $e->getMessage());
+ return false;
+ }
+ }
+
+ public function isSpeechToTextTaskTypeAvailable(): bool {
+ try {
+ $this->taskProcessingManager->getPreferredProvider(self::SPEECH_TO_TEXT_TASK_TYPE);
+ return true;
+ } catch (\Exception $e) {
+ $this->logger->debug('Speech-to-text task type is not available: ' . $e->getMessage());
+ return false;
+ }
+ }
+
+ /**
+ * @return list
+ */
+ public function getMultimodalMimetypes(bool $includingTextual = true): array {
+ $imagesEnabled = $this->isOcrTaskTypeAvailable();
+ $audioEnabled = $this->isSpeechToTextTaskTypeAvailable();
+ return array_merge(
+ $includingTextual ? Application::MIMETYPES : [],
+ $imagesEnabled ? Application::IMAGE_MIMETYPES : [],
+ $audioEnabled ? Application::AUDIO_MIMETYPES : []
+ );
+ }
+}