FunAudioLLM · muhammad-ahmed-ghani · Mar 18, 2026
diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ Fun-CineForge relies on Conda and Python environments. Execute **setup.py** to a
 
 ```shell
 # Conda
-git clone git@github.com:FunAudioLLM/FunCineForge.git
+git clone https://github.com/FunAudioLLM/FunCineForge.git
 conda create -n FunCineForge python=3.10 -y && conda activate FunCineForge
 sudo apt-get install ffmpeg
 # Initial settings
@@ -111,6 +111,15 @@ cd exps
 bash infer.sh
 ```
 
+### Interactive Demo
+Want to try it out interactively? Launch the Gradio demo for a web-based interface:
+
+```shell
+python gradio_demo.py
+```
+
+The demo will start at `http://localhost:7860` where you can upload your own videos and audio references, customize voice prompts, and generate dubbed videos on the fly.
+
 The API for multi-speaker dubbing from raw videos and SRT scripts is under development ...
 
 <a name="Recent-Updates"></a>

diff --git a/README_zh.md b/README_zh.md
@@ -36,7 +36,7 @@ Fun-CineForge 依赖 Conda 和 Python 环境。执行 **setup.py** 自动安装
 
 ```shell
 # Conda
-git clone git@github.com:FunAudioLLM/FunCineForge.git
+git clone https://github.com/FunAudioLLM/FunCineForge.git
 conda create -n FunCineForge python=3.10 -y && conda activate FunCineForge
 sudo apt-get install ffmpeg
 # 初始化设置
@@ -109,6 +109,15 @@ cd exps
 bash infer.sh
 ```
 
+### 交互式演示
+想要交互式体验吗？启动 Gradio 演示程序，使用基于网页的界面：
+
+```shell
+python gradio_demo.py
+```
+
+演示程序将在 `http://localhost:7860` 启动，您可以上传自己的视频和音频参考，自定义语音提示，即时生成配音视频。
+
 从原始视频和 SRT 脚本进行多人配音的 API 调用接口在开发中 ... 
 
 <a name="近期更新"></a>

diff --git a/funcineforge/models/causal_hifigan.py b/funcineforge/models/causal_hifigan.py
@@ -16,6 +16,7 @@
 from torch.nn.utils.parametrize import remove_parametrizations
 from torch.nn.utils.parametrizations import weight_norm
 import logging
+import soundfile as sf
 from funcineforge.register import tables
 from funcineforge.utils.device_funcs import to_device
 import os
@@ -828,9 +829,11 @@ def inference(
                     new_freq=output_sr
                 )
                 wav_sr = output_sr
-            torchaudio.save(
-                os.path.join(wav_out_dir, f"{key[0]}.wav"), recon_speech.cpu(),
-                sample_rate=wav_sr, encoding='PCM_S', bits_per_sample=16
+            sf.write(
+                os.path.join(wav_out_dir, f"{key[0]}.wav"),
+                recon_speech.cpu().squeeze(0).numpy(),
+                samplerate=wav_sr,
+                subtype='PCM_16'
             )
 
         return recon_speech
diff --git a/funcineforge/models/inference_model.py b/funcineforge/models/inference_model.py
@@ -4,6 +4,7 @@
 import numpy as np
 import os
 import torchaudio
+import soundfile as sf
 import time
 import shutil
 from funcineforge.register import tables
@@ -70,9 +71,11 @@ def inference(
                 wav_out_dir = os.path.join(output_dir, "wav")
                 os.makedirs(wav_out_dir, exist_ok=True)
                 output_wav_path = os.path.join(wav_out_dir, f"{key[0]}.wav")
-                torchaudio.save(
-                    output_wav_path, wav.cpu(),
-                    sample_rate=self.sample_rate, encoding='PCM_S', bits_per_sample=16
+                sf.write(
+                    output_wav_path,
+                    wav.cpu().squeeze(0).numpy(),
+                    samplerate=self.sample_rate,
+                    subtype='PCM_16'
                 )
 
                 silent_video_path = data_in[0]["video"]