diff --git a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala index 4264a9ca180..8e81f465fdc 100644 --- a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala +++ b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala @@ -27,6 +27,7 @@ import io.dropwizard.setup.{Bootstrap, Environment} import io.dropwizard.websockets.WebsocketBundle import org.apache.texera.amber.config.StorageConfig import org.apache.texera.amber.engine.common.Utils +import org.apache.texera.service.util.LargeBinaryManager import org.apache.texera.amber.util.ObjectMapperUtils import org.apache.texera.auth.SessionUser import org.apache.texera.dao.SqlServer @@ -104,6 +105,9 @@ class TexeraWebApplication StorageConfig.jdbcPassword ) + // ensure the large-binary S3 bucket exists before any workflow execution attempts to use it + LargeBinaryManager.initialize() + // redirect all 404 to index page, according to Angular routing requirements val eph = new ErrorPageErrorHandler eph.addErrorPage(404, "/") diff --git a/common/workflow-core/src/main/scala/org/apache/texera/service/util/LargeBinaryManager.scala b/common/workflow-core/src/main/scala/org/apache/texera/service/util/LargeBinaryManager.scala index 211d7d3b757..7886328fd56 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/service/util/LargeBinaryManager.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/service/util/LargeBinaryManager.scala @@ -32,6 +32,13 @@ import java.util.UUID object LargeBinaryManager extends LazyLogging { private val DEFAULT_BUCKET = "texera-large-binaries" + /** + * Ensures the large-binary bucket exists. Should be called once at service startup. + */ + def initialize(): Unit = { + S3StorageClient.createBucketIfNotExist(DEFAULT_BUCKET) + } + /** * Creates a new LargeBinary reference. * The actual data upload happens separately via LargeBinaryOutputStream. @@ -39,8 +46,6 @@ object LargeBinaryManager extends LazyLogging { * @return S3 URI string for the new LargeBinary (format: s3://bucket/key) */ def create(): String = { - S3StorageClient.createBucketIfNotExist(DEFAULT_BUCKET) - val objectKey = s"objects/${System.currentTimeMillis()}/${UUID.randomUUID()}" val uri = s"s3://$DEFAULT_BUCKET/$objectKey"