Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions BACKEND/app/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from fastapi import HTTPException, status, Request
from fastapi.security import OAuth2PasswordBearer
from functools import wraps
import requests

oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")

GITEA_API_URL = "http://localhost:3000/api/v1"

def validate_token(func):
@wraps(func)
async def wrapper(*args, **kwargs):
request = None
for arg in args:
if isinstance(arg, Request):
request = arg
break

if request is None:
for key, value in kwargs.items():
if isinstance(value, Request):
request = value
break

if request is None:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Request object not found",
)

auth_header = request.headers.get("Authorization")
if not auth_header or not auth_header.startswith("Bearer "):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Missing or invalid authorization header"
)
token = auth_header.replace("Bearer ", "")

try:
headers = {"Authorization": f"{token}"}
response = requests.get(f"{GITEA_API_URL}/user", headers=headers)


if response.status_code == 401:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Token expired or invalid"
)
elif response.status_code != 200:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Authentication failed"
)

if "user_info" in kwargs:
user_info = response.json()
kwargs["user_info"] = user_info
return await func(*args, **kwargs)

except Exception as e:
if isinstance(e, HTTPException):
raise e
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid authentication credentials"
)

return wrapper
12 changes: 8 additions & 4 deletions BACKEND/app/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,15 @@ def parse_usfm_to_csv(book_name, usfm_content, project_id):
""" Convert USFM content to CSV format and return extracted data """
try:
my_parser = USFMParser(usfm_content) # Initialize parser
output = my_parser.to_list(include_markers=Filter.BCV + Filter.TEXT) # Extract BCV and Text
output = my_parser.to_list(include_markers=Filter.BCV + Filter.TEXT) # Extract BCV and Text
processed_output = [
[re.sub(r"\s+", " ", value).strip() if isinstance(value, str) else value for value in row]
for row in output
]
[normalize_text(value).replace("\n", " ") if isinstance(value, str) else value for value in row]
for row in output]
# processed_output = [
# [re.sub(r"\s+", " ", value).strip() if isinstance(value, str) else value for value in row]
# for row in output
# ]

if not processed_output:
logging.error(f"No data extracted for {book_name}!")
else:
Expand Down
83 changes: 57 additions & 26 deletions BACKEND/app/router.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import itertools
from fastapi import APIRouter, HTTPException,File,UploadFile,Query
from fastapi import APIRouter, HTTPException,File,UploadFile,Query, Request, Depends
from fastapi import Body
from pydantic import BaseModel
from database import SessionLocal
Expand All @@ -14,6 +14,8 @@
import crud
from fastapi.responses import JSONResponse
import base64
from auth import validate_token
from utils import input_token



Expand All @@ -35,10 +37,11 @@ class USFMUploadRequest(BaseModel):


@router.post("/add_project/")
async def add_project(request: ProjectRequest):
@validate_token
async def add_project(request: Request, project_request: ProjectRequest, user_details=Depends(input_token)):
""" Add a new project and return the project ID """
session = SessionLocal()
project_name = request.project_name.strip()
project_name = project_request.project_name.strip()

# Validate project name (Ensure it's not empty after trimming)
if not project_name:
Expand All @@ -47,7 +50,7 @@ async def add_project(request: ProjectRequest):

# Check if the project already exists
existing_project = session.query(Project).filter_by(
project_name=request.project_name
project_name=project_request.project_name
).first()

if existing_project:
Expand All @@ -56,7 +59,7 @@ async def add_project(request: ProjectRequest):

# Insert new project
new_project = Project(
project_name=request.project_name
project_name=project_request.project_name
)
session.add(new_project)
session.commit()
Expand All @@ -69,13 +72,19 @@ async def add_project(request: ProjectRequest):


@router.get("/list_projects/")
async def list_projects(project_name: str = Query(None)):
@validate_token
async def list_projects(request: Request, project_name: str = Query(None),
user_details=Depends(input_token)):
"""
List all projects or fetch a specific project by project_name.
If project_name is provided, returns the matching project or null if not found.
"""
session = SessionLocal()
try:

# username = user_info.get("login")
# print(f"User {username} is fetching projects")

if project_name:
project = session.query(Project).filter(Project.project_name == project_name).first()
return {"project": project if project else None}
Expand All @@ -95,8 +104,9 @@ async def list_projects(project_name: str = Query(None)):


@router.post("/upload_usfm/")
async def upload_usfm(
request: USFMUploadRequest
@validate_token
async def upload_usfm(request: Request,
project_request: USFMUploadRequest, user_details=Depends(input_token)
):
"""
Upload a USFM content as a string, process it, and store data in DB.
Expand All @@ -106,9 +116,9 @@ async def upload_usfm(

try:
# Get project_id from project_name
project_name = request.project_name
usfm_sha = request.usfm_sha
encoded_usfm = request.encoded_usfm
project_name = project_request.project_name
usfm_sha = project_request.usfm_sha
encoded_usfm = project_request.encoded_usfm
project_id = crud.get_project_id(session,project_name)
logging.info(f"Processing USFM file for project: {project_name} (Project ID: {project_id})")

Expand Down Expand Up @@ -200,25 +210,27 @@ async def upload_usfm(


@router.put("/update_usfm/")
async def update_usfm(
request: USFMUploadRequest
@validate_token
async def update_usfm(request: Request,
project_request: USFMUploadRequest, user_details=Depends(input_token)
):
""" Update an existing USFM file, reprocess it, and update both book and verse tables properly. """
session = SessionLocal()

try:
# Extract values from request body
project_name = request.project_name
usfm_sha = request.usfm_sha
encoded_usfm = request.encoded_usfm
project_name = project_request.project_name
usfm_sha = project_request.usfm_sha
encoded_usfm = project_request.encoded_usfm
# Extract book name from USFM
project_id = crud.get_project_id(session,project_name)

logging.info(f"Updating USFM file for project: {project_name} (Project ID: {project_id})")
try:
usfm_bytes = base64.b64decode(encoded_usfm)
usfm = usfm_bytes.decode("utf-8") # Convert bytes to string
usfm=crud.normalize_text(usfm)
#errors are not displaying properly
# usfm=crud.normalize_text(usfm)
except Exception as e:
logging.error(f"Failed to decode USFM content: {str(e)}")
raise HTTPException(status_code=400, detail="Invalid encoded USFM content")
Expand Down Expand Up @@ -289,7 +301,9 @@ async def update_usfm(


@router.get("/list_books/")
async def list_books(project_name: str = Query(None)):
@validate_token
async def list_books(request: Request,project_name: str = Query(None),
user_details=Depends(input_token)):
""" Retrieve all Bibles (projects) along with their books and their status, optionally filtering by project name """
session = SessionLocal()
try:
Expand Down Expand Up @@ -328,7 +342,9 @@ async def list_books(project_name: str = Query(None)):


@router.get("/find_missing_verses/")
async def find_missing_verses(book_name: str, project_name: str):
@validate_token
async def find_missing_verses(request: Request,book_name: str, project_name: str,
user_details=Depends(input_token)):
"""Find missing verses for a given book_id and project_id by comparing with versification.json."""

session = SessionLocal()
Expand Down Expand Up @@ -400,7 +416,9 @@ async def find_missing_verses(book_name: str, project_name: str):


@router.get("/book/usfm/")
async def get_book_usfm(project_name: str, book_name: str):
@validate_token
async def get_book_usfm(request: Request,project_name: str, book_name: str,
user_details=Depends(input_token)):
"""
Get the USFM content of a book from the database.
"""
Expand Down Expand Up @@ -434,7 +452,9 @@ async def get_book_usfm(project_name: str, book_name: str):


@router.get("/book/json/")
async def get_book_json(project_name: str, book_name: str):
@validate_token
async def get_book_json(request: Request,project_name: str, book_name: str,
user_details=Depends(input_token)):
"""
Get the book's content in JSON format.
"""
Expand Down Expand Up @@ -476,7 +496,9 @@ async def get_book_json(project_name: str, book_name: str):


@router.get("/chapter/json/")
async def get_chapter_json(project_name: str, book_name: str, chapter: int):
@validate_token
async def get_chapter_json(request: Request,project_name: str, book_name: str,
chapter: int, user_details=Depends(input_token)):
"""
Get the chapter's content in JSON format.
"""
Expand Down Expand Up @@ -518,8 +540,10 @@ async def get_chapter_json(project_name: str, book_name: str, chapter: int):


@router.get("/book/chapters/")
@validate_token
# async def get_book_chapters(book_id: int):
async def get_book_chapters(project_name: str, book_name: str):
async def get_book_chapters(request: Request, project_name: str, book_name: str,
user_details=Depends(input_token)):
"""
Get the list of chapters available in a book.
"""
Expand Down Expand Up @@ -554,10 +578,13 @@ async def get_book_chapters(project_name: str, book_name: str):


@router.get("/parallel_corpora/withbcv/")
@validate_token
async def get_parallel_corpora_withbcv(
request: Request,
project_name_1: str,
project_name_2: str,
response_type: str = Query("csv", description="Set 'json' for JSON response, 'csv' for file download")
response_type: str = Query("csv", description="Set 'json' for JSON response, 'csv' for file download"),
user_details=Depends(input_token)
):
"""
Generate and return the parallel corpus between two projects (two languages) in CSV or JSON format.
Expand Down Expand Up @@ -675,8 +702,12 @@ async def get_parallel_corpora_withbcv(


@router.get("/parallel_corpora/withoutbcv/")
async def get_parallel_corpora_texts(project_name_1: str, project_name_2: str,
response_type: str = Query("csv", description="Set 'json' for JSON response, 'csv' for file download")):
@validate_token
async def get_parallel_corpora_texts(request: Request,
project_name_1: str, project_name_2: str,
response_type: str = Query("csv",
description="Set 'json' for JSON response, 'csv' for file download"),
user_details=Depends(input_token)):
"""
Generate and return the parallel corpus between two projects in CSV format with only Text_1 and Text_2.
"""
Expand Down
9 changes: 9 additions & 0 deletions BACKEND/app/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from fastapi import Security
from fastapi.security import HTTPBearer

oauth2_scheme = HTTPBearer()

async def input_token(token: str = Security(oauth2_scheme)):
'''Function to validate API key, currently only for swagger to have a token input'''
user_token = {"token": token}
return user_token
3 changes: 2 additions & 1 deletion BACKEND/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ SQLAlchemy
psycopg2
uvicorn
python-multipart
sacremoses
sacremoses
requests
2 changes: 2 additions & 0 deletions UI/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
node_modules
.env.local
15 changes: 15 additions & 0 deletions UI/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM node:18-alpine

WORKDIR /app

COPY package*.json ./

RUN npm install

COPY . .

RUN npm run build

EXPOSE 3001

CMD ["npm", "run", "dev", "--", "--host", "0.0.0.0"]
Loading