diff --git a/appinfo/info.xml b/appinfo/info.xml index 9394a0a..744f35e 100644 --- a/appinfo/info.xml +++ b/appinfo/info.xml @@ -44,7 +44,6 @@ Refer to the [Context Chat Backend's readme](https://github.com/nextcloud/contex OCA\ContextChat\BackgroundJobs\FileSystemListenerJob - OCA\ContextChat\BackgroundJobs\ActionJob OCA\ContextChat\BackgroundJobs\RotateLogsJob diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index 0ea6b81..24e87cb 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -61,6 +61,35 @@ class Application extends App implements IBootstrap { 'text/org', ]; + public const IMAGE_MIMETYPES = [ + 'image/bmp', + 'image/bpg', + 'image/emf', + 'image/gif', + 'image/heic', + 'image/heif', + 'image/jp2', + 'image/jpeg', + 'image/png', + 'image/svg+xml', + 'image/tga', + 'image/tiff', + 'image/webp', + 'image/x-dcraw', + 'image/x-icon', + ]; + + public const AUDIO_MIMETYPES = [ + 'audio/aac', + 'audio/flac', + 'audio/mp4', + 'audio/mpeg', + 'audio/ogg', + 'audio/wav', + 'audio/webm', + 'audio/x-scpls', + ]; + public function __construct(array $urlParams = []) { parent::__construct(self::APP_ID, $urlParams); } diff --git a/lib/BackgroundJobs/StorageCrawlJob.php b/lib/BackgroundJobs/StorageCrawlJob.php index 1447fed..9436e40 100644 --- a/lib/BackgroundJobs/StorageCrawlJob.php +++ b/lib/BackgroundJobs/StorageCrawlJob.php @@ -10,11 +10,13 @@ namespace OCA\ContextChat\BackgroundJobs; +use OCA\ContextChat\AppInfo\Application; use OCA\ContextChat\Db\QueueFile; use OCA\ContextChat\Logger; use OCA\ContextChat\Service\DiagnosticService; use OCA\ContextChat\Service\QueueService; use OCA\ContextChat\Service\StorageService; +use OCA\ContextChat\Service\TaskTypeService; use OCP\AppFramework\Services\IAppConfig; use OCP\AppFramework\Utility\ITimeFactory; use OCP\BackgroundJob\IJobList; @@ -33,12 +35,13 @@ public function __construct( private StorageService $storageService, private DiagnosticService $diagnosticService, private IAppConfig $appConfig, + private TaskTypeService $taskTypeService, ) { parent::__construct($timeFactory); } /** - * @param array{storage_id:int, root_id:int, overridden_root:int|null, override_root:int|null, last_file_id:int} $argument + * @param array{storage_id:int, root_id:int, overridden_root:int|null, override_root:int|null, last_file_id:int, only_non_textual?:bool} $argument * @return void */ protected function run($argument): void { @@ -46,6 +49,8 @@ protected function run($argument): void { $rootId = $argument['root_id']; $overrideRoot = $argument['overridden_root'] ?? $argument['override_root'] ?? $rootId; $lastFileId = $argument['last_file_id']; + $onlyNonTextual = $argument['only_non_textual'] ?? false; + $mimeTypes = $this->taskTypeService->getMultimodalMimetypes(!$onlyNonTextual); // Remove current iteration $this->jobList->remove(self::class, $argument); @@ -56,7 +61,7 @@ protected function run($argument): void { $mountFilesCount = 0; $lastSuccessfulFileId = -1; - foreach ($this->storageService->getFilesInMount($storageId, $overrideRoot ?? $rootId, $lastFileId, self::BATCH_SIZE) as $fileId) { + foreach ($this->storageService->getFilesInMount($storageId, $overrideRoot ?? $rootId, $lastFileId, self::BATCH_SIZE, $mimeTypes) as $fileId) { $queueFile = new QueueFile(); $queueFile->setStorageId($storageId); $queueFile->setRootId($rootId); diff --git a/lib/Listener/ShareListener.php b/lib/Listener/ShareListener.php index 528bd4f..6f76805 100644 --- a/lib/Listener/ShareListener.php +++ b/lib/Listener/ShareListener.php @@ -10,12 +10,12 @@ namespace OCA\ContextChat\Listener; -use OCA\ContextChat\AppInfo\Application; use OCA\ContextChat\Logger; use OCA\ContextChat\Public\UpdateAccessOp; use OCA\ContextChat\Service\ActionScheduler; use OCA\ContextChat\Service\ProviderConfigService; use OCA\ContextChat\Service\StorageService; +use OCA\ContextChat\Service\TaskTypeService; use OCP\EventDispatcher\Event; use OCP\EventDispatcher\IEventListener; use OCP\Files\FileInfo; @@ -37,6 +37,7 @@ public function __construct( private IManager $shareManager, private ActionScheduler $actionService, private IGroupManager $groupManager, + private TaskTypeService $taskTypeService, ) { } @@ -145,6 +146,7 @@ public function handle(Event $event): void { private function allowedMimeType(Node $file): bool { $mimeType = $file->getMimeType(); - return in_array($mimeType, Application::MIMETYPES, true); + $mimeTypes = $this->taskTypeService->getMultimodalMimetypes(); + return in_array($mimeType, $mimeTypes, true); } } diff --git a/lib/Migration/Version006000000Date20260316135634.php b/lib/Migration/Version006000000Date20260316135634.php new file mode 100644 index 0000000..5ed9c91 --- /dev/null +++ b/lib/Migration/Version006000000Date20260316135634.php @@ -0,0 +1,68 @@ +taskTypeService->isOcrTaskTypeAvailable()) { + $output->warning('[Context Chat] OCR task type is not available, image files will not be indexed.'); + } + if (!$this->taskTypeService->isSpeechToTextTaskTypeAvailable()) { + $output->warning('[Context Chat] Speech-to-text task type is not available, audio files will not be indexed.'); + } + + try { + foreach ($this->storageService->getMounts() as $mount) { + $this->logger->debug('Scheduling StorageCrawlJob storage_id=' . $mount['storage_id'] . ' root_id=' . $mount['root_id' ] . 'override_root=' . $mount['overridden_root']); + $this->jobList->add(StorageCrawlJob::class, [ + 'storage_id' => $mount['storage_id'], + 'root_id' => $mount['root_id' ], + 'overridden_root' => $mount['overridden_root'], + 'last_file_id' => 0, + 'only_non_textual' => true, + ]); + } + } catch (\Exception $e) { + $this->logger->error('Failed to schedule StorageCrawlJob to find files for indexation.', ['exception' => $e]); + $output->warning('Failed to schedule StorageCrawlJob to find files for indexation: ' . $e->getMessage()); + return; + } + + $output->info('Multimodal files have been scheduled to be queued for indexation.'); + } +} diff --git a/lib/Service/FsEventService.php b/lib/Service/FsEventService.php index 58adb0f..c6bc432 100644 --- a/lib/Service/FsEventService.php +++ b/lib/Service/FsEventService.php @@ -7,13 +7,11 @@ namespace OCA\ContextChat\Service; -use OCA\ContextChat\AppInfo\Application; use OCA\ContextChat\Db\QueueFile; use OCA\ContextChat\Logger; use OCP\DB\Exception; use OCP\Files\Folder; use OCP\Files\InvalidPathException; -use OCP\Files\IRootFolder; use OCP\Files\Node; use OCP\Files\NotFoundException; @@ -24,7 +22,7 @@ public function __construct( private QueueService $queue, private ActionScheduler $actionService, private StorageService $storageService, - private IRootFolder $rootFolder, + private TaskTypeService $taskTypeService, ) { } @@ -134,7 +132,8 @@ public function onInsert(Node $node, bool $recurse = true, bool $update = false) private function allowedMimeType(Node $file): bool { $mimeType = $file->getMimeType(); - return in_array($mimeType, Application::MIMETYPES, true); + $mimeTypes = $this->taskTypeService->getMultimodalMimetypes(); + return in_array($mimeType, $mimeTypes, true); } private function allowedPath(Node $file): bool { diff --git a/lib/Service/StorageService.php b/lib/Service/StorageService.php index b937259..7709bea 100644 --- a/lib/Service/StorageService.php +++ b/lib/Service/StorageService.php @@ -20,7 +20,6 @@ use OCP\Files\Config\IUserMountCache; use OCP\Files\Folder; use OCP\Files\IMimeTypeLoader; -use OCP\Files\IRootFolder; use OCP\Files\Node; use OCP\FilesMetadata\IFilesMetadataManager; use OCP\IDBConnection; @@ -44,8 +43,8 @@ public function __construct( private IMimeTypeLoader $mimeTypes, private IUserMountCache $userMountCache, private IFilesMetadataManager $metadataManager, - private IRootFolder $rootFolder, private IFileAccess $fileAccess, + private TaskTypeService $taskTypeService, ) { } @@ -84,7 +83,8 @@ public function countFilesInMount(int $storageId, int $rootId): int { return 0; } - $mimeTypes = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Application::MIMETYPES); + $mimeTypes = $this->taskTypeService->getMultimodalMimetypes(); + $mimeTypesIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), $mimeTypes); $qb = $this->getCacheQueryBuilder(); @@ -110,7 +110,7 @@ public function countFilesInMount(int $storageId, int $rootId): int { ->andWhere($qb->expr()->notLike('filecache.path', $qb->createNamedParameter('files_versions/%'))) ->andWhere($qb->expr()->notLike('filecache.path', $qb->createNamedParameter('files_trashbin/%'))) ->andWhere($qb->expr()->eq('filecache.storage', $qb->createNamedParameter($storageId))) - ->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypes, IQueryBuilder::PARAM_INT_ARRAY))) + ->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypesIds, IQueryBuilder::PARAM_INT_ARRAY))) ->andWhere($qb->expr()->lte('filecache.size', $qb->createNamedParameter(Application::CC_MAX_SIZE, IQueryBuilder::PARAM_INT))) ->andWhere($qb->expr()->gt('filecache.size', $qb->createNamedParameter(0, IQueryBuilder::PARAM_INT))); $result = $qb->executeQuery(); @@ -199,14 +199,24 @@ private function getMountsOld(): \Generator { * @param int $rootId * @param int $lastFileId * @param int $maxResults + * @param list $mimeTypes * @return \Generator */ - public function getFilesInMount(int $storageId, int $rootId, int $lastFileId = 0, int $maxResults = 100): \Generator { + public function getFilesInMount( + int $storageId, + int $rootId, + int $lastFileId = 0, + int $maxResults = 100, + array $mimeTypes = [], + ): \Generator { + if ($mimeTypes === []) { + $mimeTypes = $this->taskTypeService->getMultimodalMimetypes(); + } if (!$this->isFileAccessAvailable()) { - return $this->getFilesInMountOld($storageId, $rootId, $lastFileId, $maxResults); + return $this->getFilesInMountOld($storageId, $rootId, $lastFileId, $maxResults, $mimeTypes); } - return $this->getFilesInMountUsingFileAccess($storageId, $rootId, $lastFileId, $maxResults); + return $this->getFilesInMountUsingFileAccess($storageId, $rootId, $lastFileId, $maxResults, $mimeTypes); } /** @@ -214,10 +224,17 @@ public function getFilesInMount(int $storageId, int $rootId, int $lastFileId = 0 * @param int $rootId * @param int $lastFileId * @param int $maxResults + * @param list $mimeTypes * @return \Generator */ - private function getFilesInMountUsingFileAccess(int $storageId, int $rootId, int $lastFileId = 0, int $maxResults = 100): \Generator { - $mimeTypeIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Application::MIMETYPES); + private function getFilesInMountUsingFileAccess( + int $storageId, + int $rootId, + int $lastFileId = 0, + int $maxResults = 100, + array $mimeTypes = Application::MIMETYPES, + ): \Generator { + $mimeTypeIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), $mimeTypes); foreach ($this->fileAccess->getByAncestorInStorage($storageId, $rootId, $lastFileId, $maxResults, $mimeTypeIds, false, true) as $cacheEntry) { yield $cacheEntry['fileid']; } @@ -228,9 +245,16 @@ private function getFilesInMountUsingFileAccess(int $storageId, int $rootId, int * @param int $rootId * @param int $lastFileId * @param int $maxResults + * @param list $mimeTypes * @return \Generator */ - private function getFilesInMountOld(int $storageId, int $rootId, int $lastFileId = 0, int $maxResults = 100): \Generator { + private function getFilesInMountOld( + int $storageId, + int $rootId, + int $lastFileId = 0, + int $maxResults = 100, + array $mimeTypes = Application::MIMETYPES, + ): \Generator { $qb = $this->getCacheQueryBuilder(); try { $qb->selectFileCache(); @@ -249,7 +273,7 @@ private function getFilesInMountOld(int $storageId, int $rootId, int $lastFileId return; } - $mimeTypes = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Application::MIMETYPES); + $mimeTypesIds = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), $mimeTypes); $qb = $this->getCacheQueryBuilder(); @@ -272,7 +296,7 @@ private function getFilesInMountOld(int $storageId, int $rootId, int $lastFileId ->andWhere($qb->expr()->like('filecache.path', $qb->createNamedParameter($path . '%'))) ->andWhere($qb->expr()->eq('filecache.storage', $qb->createNamedParameter($storageId))) ->andWhere($qb->expr()->gt('filecache.fileid', $qb->createNamedParameter($lastFileId))) - ->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypes, IQueryBuilder::PARAM_INT_ARRAY))); + ->andWhere($qb->expr()->in('filecache.mimetype', $qb->createNamedParameter($mimeTypesIds, IQueryBuilder::PARAM_INT_ARRAY))); if ($maxResults !== 0) { $qb->setMaxResults($maxResults); diff --git a/lib/Service/TaskTypeService.php b/lib/Service/TaskTypeService.php new file mode 100644 index 0000000..cbe0bc5 --- /dev/null +++ b/lib/Service/TaskTypeService.php @@ -0,0 +1,59 @@ +taskProcessingManager->getPreferredProvider(self::OCR_TASK_TYPE); + return true; + } catch (\Exception $e) { + $this->logger->debug('OCR task type is not available: ' . $e->getMessage()); + return false; + } + } + + public function isSpeechToTextTaskTypeAvailable(): bool { + try { + $this->taskProcessingManager->getPreferredProvider(self::SPEECH_TO_TEXT_TASK_TYPE); + return true; + } catch (\Exception $e) { + $this->logger->debug('Speech-to-text task type is not available: ' . $e->getMessage()); + return false; + } + } + + /** + * @return list + */ + public function getMultimodalMimetypes(bool $includingTextual = true): array { + $imagesEnabled = $this->isOcrTaskTypeAvailable(); + $audioEnabled = $this->isSpeechToTextTaskTypeAvailable(); + return array_merge( + $includingTextual ? Application::MIMETYPES : [], + $imagesEnabled ? Application::IMAGE_MIMETYPES : [], + $audioEnabled ? Application::AUDIO_MIMETYPES : [] + ); + } +}