diff --git a/setup.py b/setup.py index 995ef2f..cb023d8 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ def readme(): setup( name='statscounter', - version='0.0.010', + version='0.0.011', url='https://github.com/datalib/statscounter', license='MIT', description="Python's missing statistical Swiss Army knife", diff --git a/statscounter/statscounter.py b/statscounter/statscounter.py index 2d4501c..3a2e16c 100644 --- a/statscounter/statscounter.py +++ b/statscounter/statscounter.py @@ -25,60 +25,104 @@ import statscounter.stats as stats +NUMBER_TYPES = set(['float', 'int', 'Decimal', 'Fraction']) + + +class MultipleMostCommonValuesError(ValueError): + """""" + pass + + class StatsCounter(Counter): + + def key_types_distribution(self): + """Return a p. distribution of the elements' types""" + return StatsCounter([type(element).__name__ + for element in self.elements()]).normalize() + def mean(self): + """ AKA Expectation + """ + try: + return stats.mean(self.elements()) + except (TypeError): + raise TypeError("Distribution is not a numerical type.") + + def expectation(self): """ """ - return stats.mean(self.values()) + return self.mean() def median(self, ): """ """ - return stats.median(self.values()) - + key_type = self.key_types_distribution().most_common(1)[0] + print(key_type) + if key_type[0] not in NUMBER_TYPES or key_type[1] != 1.0: + raise TypeError("Distribution is not a numerical type.") + else: + return stats.median(self.elements()) + def median_low(self): """ """ - return stats.median_low(self.values()) + key_type = self.key_types_distribution().most_common(1)[0] + if key_type[0] not in NUMBER_TYPES or key_type[1] != 1.0: + raise TypeError("Distribution is not a numerical type.") + else: + return stats.median_low(self.elements()) def median_high(self): """ """ - return stats.median_high(self.values()) - + key_type = self.key_types_distribution().most_common(1)[0] + if key_type[0] not in NUMBER_TYPES or key_type[1] != 1.0: + raise TypeError("Distribution is not a numerical type.") + else: + return stats.median_high(self.elements()) + def median_grouped(self): """ """ - return stats.median_grouped(self.values()) - + key_type = self.key_types_distribution().most_common(1)[0] + if key_type[0] not in NUMBER_TYPES or key_type[1] != 1.0: + raise TypeError("Distribution is not a numerical type.") + else: + return stats.median_grouped(self.elements()) + def mode(self): """ """ - return stats.mode(self.values()) + return stats.mode(self.elements()) def variance(self): """ """ - return stats.variance(self.values()) + return stats.variance(self.elements()) def pvariance(self): """ """ - return stats.pvariance(self.values()) + return stats.pvariance(self.elements()) def stdev(self, ): """ """ - return stats.stdev(self.values()) + return stats.stdev(self.elements()) def pstdev(self): """ """ - return stats.pstdev(self.values()) + return stats.pstdev(self.elements()) def best_pair(self): - return self.most_common(1)[0] - + try: + self.mode() + except (stats.StatisticsError): + raise MultipleMostCommonValuesError("Two or more values appear more than once.") + else: + return self.most_common(1)[0] + def argmax(self): """ """ diff --git a/tests/test_statscounter.py b/tests/test_statscounter.py index fa551cd..dd028e7 100644 --- a/tests/test_statscounter.py +++ b/tests/test_statscounter.py @@ -1,56 +1,103 @@ from __future__ import division from pytest import raises from statscounter import StatsCounter, stats - +from statscounter.statscounter import MultipleMostCommonValuesError class TestStatsCounter: - counter_ints = StatsCounter({str(s):s for s in range(1000)}) - - def test_mean_int(self): - m = self.counter_ints.mean() - d = 499500/1000 + counter_ints = StatsCounter([1,1,2,3,4]) + counter_ints_with_two_modes = StatsCounter([1,1,2,3,4,4]) + counter_chars = StatsCounter('aabccd') + + def test_key_types_distribution(self): + ci = self.counter_ints.key_types_distribution() + ci2 = self.counter_ints_with_two_modes.key_types_distribution() + cc = self.counter_chars.key_types_distribution() + + assert ci == StatsCounter(['int']) + assert ci2 == StatsCounter(['int']) + assert cc == StatsCounter(['str']) + + def test_mean(self): + m = self.counter_ints_with_two_modes.mean() + d = 15/6 assert m == d - + + def test_mean_throws_exception(self): + with raises(TypeError): + self.counter_chars.mean() + + def test_median(self): + m = self.counter_ints_with_two_modes.median() + assert m == 2.5 + + def test_median_throws_exception(self): + with raises(TypeError): + self.counter_chars.median() + def test_median_low(self): - m = self.counter_ints.median_low() - assert m == 499 - + m = self.counter_ints_with_two_modes.median_low() + assert m == 2 + + def test_median_low_throws_exception(self): + with raises(TypeError): + self.counter_chars.median_low() + def test_median_high(self, ): - m = self.counter_ints.median_high() - assert m == 500 + m = self.counter_ints_with_two_modes.median_high() + assert m == 3 + + def test_median_high_throws_exception(self): + with raises(TypeError): + self.counter_chars.median_high() def test_median_grouped(self, ): - m = self.counter_ints.median_grouped() - assert m == 499.5 - - def test_mode(self): + m = self.counter_ints_with_two_modes.median_grouped() + assert m == 2.5 + + def test_median_grouped_throws_exception(self): + with raises(TypeError): + self.counter_chars.median_grouped() + + def test_mode(self, ): + m = self.counter_ints.mode() + assert m == 1 + + def test_mode_throws_exception(self): with raises(stats.StatisticsError): - self.counter_ints.mode() + self.counter_ints_with_two_modes.mode() def test_variance(self): - m = self.counter_ints.variance() - assert m == 83416.66666666667 + m = self.counter_ints_with_two_modes.variance() + assert m == 1.9 def test_stdev(self, ): - m = self.counter_ints.stdev() - assert m == 288.8194360957494 + m = self.counter_ints_with_two_modes.stdev() + assert m == 1.378404875209022 def test_pvariance(self): - m = self.counter_ints.pvariance() - assert m == 83333.25 + m = self.counter_ints_with_two_modes.pvariance() + assert m == 1.5833333333333333 def test_pstdev(self, ): - m = self.counter_ints.pstdev() - assert m == 288.6749902572095 - + m = self.counter_ints_with_two_modes.pstdev() + assert m == 1.2583057392117916 + def test_argmax(self): m = self.counter_ints.argmax() - assert m == '999' + assert m == 1 + + def test_argmax_throws_exception(self): + with raises(MultipleMostCommonValuesError): + m = self.counter_ints_with_two_modes.argmax() def test_max(self): m = self.counter_ints.max() - assert m == 999 + assert m == 2 + def test_max_throws_exception(self): + with raises(MultipleMostCommonValuesError): + m = self.counter_ints_with_two_modes.max() + def test_normalize(self): pdist = StatsCounter({1: 1, 2: 2, 3: 1}).normalize() assert pdist == {