Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion src/FSharp.Stats/Testing/ChiSquareTest.fs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,31 @@ type ChiSquareTest =

TestStatistics.createChiSquare chi2 (float degreesOfFreedom)

/// <summary>
/// Pearson χ² test of independence for an r×c contingency table.
/// </summary>
/// <remarks>
/// For each cell (i,j) the expected count is E = rowTotal(i) × colTotal(j) / N.
/// The test statistic is χ² = Σ (O − E)² / E and has (r−1)(c−1) degrees of freedom.
/// Cells with expected count zero are skipped (they contribute 0 to χ²).
/// </remarks>
static member pearsonChiSquared (table:ContingencyTable<_,_>) =
42.
let numRows = table.NumRows
let numCols = table.NumCols
let N = float (Contingency.total table)
if N = 0.0 then invalidArg "table" "ContingencyTable is empty (grand total is zero)"
let rowTotals = table.RowKeys |> Array.map (fun r -> float (Contingency.rowTotal r table))
let colTotals = table.ColKeys |> Array.map (fun c -> float (Contingency.columnTotal c table))
let chi2 =
[| for i in 0..numRows-1 do
for j in 0..numCols-1 do
let O = float (Contingency.getCount table.RowKeys.[i] table.ColKeys.[j] table)
let E = rowTotals.[i] * colTotals.[j] / N
if E > 0.0 then
yield (O - E) * (O - E) / E |]
|> Array.sum
let df = (numRows - 1) * (numCols - 1)
TestStatistics.createChiSquare chi2 (float df)

static member pearsonChiSquared (table:Contingency2x2<_,_>) =
let apply o e =
Expand Down
31 changes: 30 additions & 1 deletion tests/FSharp.Stats.Tests/Testing.fs
Original file line number Diff line number Diff line change
Expand Up @@ -384,13 +384,42 @@ let chiSquaredTests =
let df = expected.Length - 1
ChiSquareTest.compute df expected observed

// Pearson chi-squared test of independence on a 2×3 contingency table.
// Verified against R: chisq.test(matrix(c(8,12,15,5,10,15), nrow=2), correct=FALSE)
// chi-squared = 6.7862, df = 2, p-value = 0.03360
let contingency2x3 =
Contingency.create
[| "A"; "B" |] // row labels
[| "O1"; "O2"; "O3" |] // column labels
[| 8; 15; 10; 12; 5; 15 |]

// Pearson chi-squared test of independence on a 2×2 contingency table via the general path.
// Verified against R: chisq.test(matrix(c(8,12,15,5), nrow=2), correct=FALSE)
// chi-squared = 5.0128, df = 1, p-value = 0.02516
let contingency2x2general =
Contingency.create
[| "A"; "B" |]
[| "O1"; "O2" |]
[| 8; 15; 12; 5 |]

testList "Testing.ChiSquaredTest" [
testCase "compute" <| fun () ->
Expect.isTrue (0.9254 = Math.Round(testCase1.PValueRight,4)) "pValue should be equal."
Expect.isTrue (0.4700 = Math.Round(testCase1.Statistic,4)) "statistic should be equal."
Expect.isTrue (0.000638 = Math.Round(testCase2.PValueRight,6)) "pValue should be equal."
Expect.isTrue (19.461 = Math.Round(testCase2.Statistic,3)) "statistic should be equal."

testCase "pearsonChiSquared 2x3 statistic" <| fun () ->
let result = ChiSquareTest.pearsonChiSquared contingency2x3
Expect.floatClose Accuracy.medium result.Statistic 6.7862 "chi2 statistic should match R"
testCase "pearsonChiSquared 2x3 degrees of freedom" <| fun () ->
let result = ChiSquareTest.pearsonChiSquared contingency2x3
Expect.floatClose Accuracy.high result.DegreesOfFreedom 2.0 "df should be (2-1)*(3-1)=2"
testCase "pearsonChiSquared 2x3 p-value" <| fun () ->
let result = ChiSquareTest.pearsonChiSquared contingency2x3
Expect.isTrue (0.0336 = Math.Round(result.PValueRight, 4)) "p-value should match R to 4 decimal places"
testCase "pearsonChiSquared 2x2 general path" <| fun () ->
let result = ChiSquareTest.pearsonChiSquared contingency2x2general
Expect.floatClose Accuracy.medium result.Statistic 5.0128 "chi2 statistic should match R for 2x2"
]

[<Tests>]
Expand Down
Loading