Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 238 additions & 0 deletions .hyf/grader_lib.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
#!/usr/bin/env bash
# grader_lib.sh — shared helpers for HYF Data Track autograders.
# Source this at the top of test.sh:
# source "$(dirname "$0")/grader_lib.sh"
#
# Design rule: every check_* function always returns 0.
# Results are communicated via pass()/fail()/warn() into _grader_details,
# never via exit code. This makes bare calls safe under `set -euo pipefail`.

_grader_details=()

pass() { _grader_details+=("✓ PASS $1"); }
fail() { _grader_details+=("✗ FAIL $1"); }
warn() { _grader_details+=("⚠ WARN $1"); }

print_results() {
local header="${1:-Autograder Results}"
echo ""
echo "=== $header ==="
for line in "${_grader_details[@]}"; do echo " $line"; done
echo ""
}

write_score() {
# write_score <score> <passing> [<outfile>]
local score="$1"
local passing="$2"
local outfile="${3:-$(dirname "${BASH_SOURCE[0]}")/score.json}"
local pass_flag="false"
[[ "$score" -ge "$passing" ]] && pass_flag="true"
cat > "$outfile" << JSON
{
"score": $score,
"pass": $pass_flag,
"passingScore": $passing
}
JSON
echo "Score: $score / 100 (passing: $passing) pass=$pass_flag"
}

# ── Common static-analysis checks ────────────────────────────────────────────
# All check_* functions always return 0. Findings are recorded via
# pass()/fail()/warn() and printed by print_results().

check_no_print_statements() {
# Usage: check_no_print_statements <dir> [label]
# Flags bare print() calls that should be logging calls.
local dir="${1:-.}"
local label="${2:-$dir}"
local found
found=$(grep -rn "^[[:space:]]*print(" "$dir" --include="*.py" 2>/dev/null | grep -v "# noqa" || true)
if [[ -n "$found" ]]; then
local count
count=$(echo "$found" | wc -l | tr -d ' ')
warn "$label: $count print() call(s) found — use logging.info/warning/error instead (see Week 1 Ch1)"
fi
return 0
}

check_no_notimplemented() {
# Usage: check_no_notimplemented <dir> [label]
# Flags NotImplementedError stubs left in after implementation.
local dir="${1:-.}"
local label="${2:-$dir}"
local found
found=$(grep -rn "raise NotImplementedError" "$dir" --include="*.py" 2>/dev/null || true)
if [[ -n "$found" ]]; then
fail "$label: raise NotImplementedError still present — remove stubs before submitting"
fi
return 0
}

check_no_relative_imports() {
# Usage: check_no_relative_imports <dir> [label]
# Flags `from .module import x` in scripts not inside a proper package.
# Relative imports break the grader: python3 src/cleaner.py fails with
# "attempted relative import with no known parent package".
local dir="${1:-.}"
local label="${2:-$dir}"
local found
found=$(grep -rn "^from \." "$dir" --include="*.py" 2>/dev/null || true)
if [[ -n "$found" ]]; then
fail "$label: relative import found (from .module) — use absolute: 'from src.module import x'"
fi
return 0
}

check_no_logging_in_utils() {
# Usage: check_no_logging_in_utils <utils_file>
# utils.py should be pure helpers; logging config belongs in the entry point.
local file="${1:-task-1/src/utils.py}"
if [[ ! -f "$file" ]]; then return 0; fi
if grep -qE "logging\.basicConfig|logging\.getLogger" "$file"; then
warn "$file: logging.basicConfig/getLogger found — logging setup belongs in cleaner.py or the entry-point, not in utils"
fi
return 0
}

check_gitignore_python() {
# Usage: check_gitignore_python [<gitignore_path>]
# Warns when Python cache patterns are absent from .gitignore.
local gi="${1:-.gitignore}"
if [[ ! -f "$gi" ]]; then
warn ".gitignore is missing — add one so __pycache__/ and *.pyc are not committed"
return 0
fi
local ok=true
if ! grep -q "__pycache__" "$gi"; then
warn ".gitignore missing __pycache__/ — Python bytecode cache dirs should not be committed"
ok=false
fi
if ! grep -qE "^\*\.pyc$|^.*\*\.pyc" "$gi"; then
warn ".gitignore missing *.pyc — compiled Python files should not be committed"
ok=false
fi
if ! grep -qE "^\.env$|^\.env\b" "$gi"; then
warn ".gitignore missing .env — secret files should not be committed"
ok=false
fi
if [[ "$ok" = true ]]; then pass ".gitignore correctly excludes __pycache__/, *.pyc, and .env"; fi
return 0
}

check_screenshot_is_png() {
# Usage: check_screenshot_is_png <expected_path>
# Awards full credit for .png, warns for .jpg/.jpeg, fails if missing.
local expected_png="$1"
local dir
dir="$(dirname "$expected_png")"
local base
base="$(basename "$expected_png" .png)"
if [[ -s "$expected_png" ]]; then
pass "screenshot is $expected_png (.png format ✓)"
return 0
fi
for ext in jpg jpeg; do
if [[ -s "$dir/$base.$ext" ]]; then
warn "screenshot is .$ext but should be .png — rename to $base.png (partial credit still given)"
return 0
fi
done
fail "screenshot missing: $expected_png not found"
return 0
}

check_silent_zero_in_except() {
# Usage: check_silent_zero_in_except <file>
# Detects: try: x = compute() / except: x = 0 (silent data corruption).
local file="$1"
if [[ ! -f "$file" ]]; then return 0; fi
local found
found=$(python3 - "$file" 2>/dev/null << 'PY'
import ast, sys
try:
tree = ast.parse(open(sys.argv[1]).read())
except SyntaxError:
sys.exit(0)
for node in ast.walk(tree):
if isinstance(node, ast.ExceptHandler):
for stmt in node.body:
if isinstance(stmt, ast.Assign):
if isinstance(stmt.value, ast.Constant) and stmt.value.value == 0:
print(f"line {stmt.lineno}: '{ast.unparse(stmt)}' — sets field to 0 in except block (silent data corruption)")
PY
)
if [[ -n "$found" ]]; then
warn "$file: silent 0-assignment in except block — skip the row or raise instead:\n $found"
fi
return 0
}

check_exception_logged() {
# Usage: check_exception_logged <dir>
# Warns when an except block logs a message but omits the exception variable.
local dir="${1:-.}"
local found
found=$(python3 - "$dir" 2>/dev/null << 'PY'
import ast, os, sys
issues = []
for root, _, files in os.walk(sys.argv[1]):
for fname in files:
if not fname.endswith(".py"):
continue
path = os.path.join(root, fname)
try:
tree = ast.parse(open(path).read())
except SyntaxError:
continue
for node in ast.walk(tree):
if not isinstance(node, ast.ExceptHandler):
continue
exc_var = node.name
if not exc_var:
continue
for stmt in node.body:
for call in ast.walk(stmt):
if not isinstance(call, ast.Call):
continue
func = call.func
is_log = (isinstance(func, ast.Attribute) and
isinstance(func.value, ast.Name) and
func.value.id == "logging")
is_print = isinstance(func, ast.Name) and func.id == "print"
if not (is_log or is_print):
continue
src = ast.unparse(call)
if exc_var not in src:
issues.append(f"{path}:{call.lineno}: log message doesn't include '{exc_var}' — add it for easier debugging")
if issues:
for i in issues[:3]:
print(i)
PY
)
if [[ -n "$found" ]]; then
warn "exception variable missing from log message:\n $found"
fi
return 0
}

check_ruff() {
# Usage: check_ruff <dir> [<select>]
# Runs ruff if available; warns on F401 (unused imports) and E302/E303 (blank lines).
local dir="${1:-.}"
local select="${2:-F401,E302,E303}"
if ! command -v ruff &>/dev/null && ! python3 -m ruff --version &>/dev/null 2>&1; then
return 0
fi
local out
out=$(python3 -m ruff check --select="$select" --output-format=text "$dir" 2>/dev/null || true)
if [[ -n "$out" ]]; then
local count
count=$(echo "$out" | grep -c "\.py:" || true)
warn "$dir: ruff found $count style issue(s) (unused imports / missing blank lines) — run 'ruff check $dir' to see details"
else
pass "$dir: no ruff style issues (F401/E302/E303)"
fi
return 0
}
86 changes: 68 additions & 18 deletions .hyf/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$REPO_ROOT"

# shellcheck source=.hyf/grader_lib.sh
source "$SCRIPT_DIR/grader_lib.sh"

PASSING=60

# --- Task 1: Cleaner Pipeline (60 points) ---
Expand Down Expand Up @@ -186,40 +189,87 @@ fi
# Screenshot is required (10 pts); blob_url.txt with a valid Azure Storage
# URL earns the remaining 10 pts. Both checks live inside the screenshot
# branch — no screenshot means 0/20 regardless of blob_url.txt.
# Full credit for .png (the required format). Partial credit + warning for
# .jpg/.jpeg (wrong extension — matches c55 review pattern).
task3=0
task3_msg="missing task-3/assets/azure_blob_week2.png (or .jpg/.jpeg)"
screenshot_found=false
for ext in png jpg jpeg; do
if [ -s "task-3/assets/azure_blob_week2.$ext" ]; then
task3=10
screenshot_found=true
if [ "$ext" != "png" ]; then
warn "Task 3: screenshot is .$ext but the assignment requires .png — rename to azure_blob_week2.png"
task3=7 # partial credit for wrong format
task3_msg="azure_blob_week2.$ext present but wrong format — rename to .png"
else
task3=10
task3_msg="azure_blob_week2.png present"
fi
if [ -s "task-3/assets/blob_url.txt" ]; then
# Require at least <container>/<blob> after the host so a bare
# storage-account root URL doesn't satisfy the check.
if grep -qE "https://[a-z0-9]+\.blob\.core\.windows\.net/[^/]+/[^/]+" task-3/assets/blob_url.txt; then
task3=20
task3_msg="screenshot and blob URL both present"
task3=$((task3 + 10))
task3_msg="${task3_msg}; blob_url.txt valid"
else
task3_msg="blob_url.txt present but URL format is wrong — expected https://<account>.blob.core.windows.net/<container>/<blob>"
task3_msg="${task3_msg}; blob_url.txt present but URL format is wrong — expected https://<account>.blob.core.windows.net/<container>/<blob>"
fi
else
task3_msg="screenshot present but task-3/assets/blob_url.txt is missing"
task3_msg="${task3_msg}; task-3/assets/blob_url.txt is missing"
fi
break
fi
done

score=$((task1 + task2 + task3))
if [ "$score" -ge "$PASSING" ]; then pass=true; else pass=false; fi

cat > "$SCRIPT_DIR/score.json" <<EOF
{
"score": $score,
"pass": $pass,
"passingScore": $PASSING
}
EOF

echo "Task 1 (Cleaner Pipeline): $task1/60 — $task1_msg"
echo "Task 2 (AI Debug Report): $task2/20 — $task2_msg"

# ── Code hygiene warnings (0 pts — informational only) ──────────────────────
# These checks mirror recurring review comments from cohort c55. They do not
# affect the score but surface issues the teacher would otherwise flag manually.
echo ""
echo "--- Code Hygiene (warnings only, do not affect score) ---"

# print() should be replaced with logging.* — chapter mandates logging.
check_no_print_statements "task-1/src" "task-1/src"

# NotImplementedError stubs left in after implementation.
# Reviewer comment: "please remove the NotImplementedError from your helpers
# since you've already added the functionality".
check_no_notimplemented "task-1/src" "task-1/src"

# Silent 0-assignment in except blocks corrupts data silently.
# Reviewer comment: "sets price/revenue/vat to 0 instead of skipping the row,
# a bad price would silently corrupt the output rather than being dropped".
check_silent_zero_in_except "task-1/src/transforms.py"

# Exception variable not included in log message loses error context.
# Reviewer comment: "I would log the error type for easier debug!"
check_exception_logged "task-1/src"

# Unused imports and missing blank lines — caught by ruff F401/E302.
# Reviewer comments: "This import isn't used anywhere" and
# "need to add new line after the end of each function".
check_ruff "task-1/src" "F401,E302,E303"

# AI_DEBUG.md should include the full traceback.
# Reviewer comment: "Would be good if the full traceback error was pasted here".
if [ -s task-2/AI_DEBUG.md ]; then
if ! grep -q "Traceback" task-2/AI_DEBUG.md; then
warn "AI_DEBUG.md: no 'Traceback' found — paste the full error traceback in the '## The Error' section"
fi
fi

# .gitignore should exclude Python cache files.
check_gitignore_python ".gitignore"

print_results "Week 2 Autograder"

# ── Final score ──────────────────────────────────────────────────────────────
write_score "$score" "$PASSING" "$SCRIPT_DIR/score.json"

echo ""
echo "Task 1 (Cleaner Pipeline): $task1/60 — $task1_msg"
echo "Task 2 (AI Debug Report): $task2/20 — $task2_msg"
echo "Task 3 (Azure Blob Upload): $task3/20 — $task3_msg"
echo "----------------------------------------"
echo "Total: $score/100 — pass=$pass (passing threshold: $PASSING)"
echo "Total: $score/100 — (passing threshold: $PASSING)"