From e84e18f86a8d439b3d21ff67a2d81438ac1e8cfc Mon Sep 17 00:00:00 2001
From: im-rodrigo <rodrigopala91@gmail.com>
Date: Tue, 21 Jul 2015 23:28:28 -0400
Subject: [PATCH 1/6] added new error for cases when attempting to find mean of
 non-numerical values; the stats fns are now executed against 'elemends' and
 no 'values'

---
 statscounter/statscounter.py | 37 ++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/statscounter/statscounter.py b/statscounter/statscounter.py
index 2d4501c..c1e22d1 100644
--- a/statscounter/statscounter.py
+++ b/statscounter/statscounter.py
@@ -25,56 +25,73 @@
 import statscounter.stats as stats
 
 
+class WrongVariableTypeError(ValueError):
+	"""You cannot find the 'expected value' (mean) of a distribution
+	of categorical (nominal) random variables (for example, a 
+	distribution of words is equivalent to a categorical variable).
+	It makes no sense to find the average word.
+	"""
+	pass
+
+
 class StatsCounter(Counter):
 	def mean(self):
+		""" AKA Expectation
+		"""
+		try:
+			return stats.mean(self.elements())
+		except (TypeError):
+			raise WrongVariableTypeError("Distribution is not a numerical type.")
+		
+	def expectation(self):
 		"""
 		"""
-		return stats.mean(self.values())
+		return self.mean()
 
 	def median(self, ):
 		"""
 		"""
-		return stats.median(self.values())
+		return stats.median(self.elements())
 
 	def median_low(self):
 		"""
 		"""
-		return stats.median_low(self.values())
+		return stats.median_low(self.elements())
 
 	def median_high(self):
 		"""
 		"""
-		return stats.median_high(self.values())
+		return stats.median_high(self.elements())
 
 	def median_grouped(self):
 		"""
 		"""
-		return stats.median_grouped(self.values())
+		return stats.median_grouped(self.elements())
 
 	def mode(self):
 		"""
 		"""
-		return stats.mode(self.values())
+		return stats.mode(self.elements())
 
 	def variance(self):
 		"""
 		"""
-		return stats.variance(self.values())
+		return stats.variance(self.elements())
 
 	def pvariance(self):
 		"""
 		"""
-		return stats.pvariance(self.values())
+		return stats.pvariance(self.elements())
 
 	def stdev(self, ):
 		"""
 		"""
-		return stats.stdev(self.values())
+		return stats.stdev(self.elements())
 
 	def pstdev(self):
 		"""
 		"""
-		return stats.pstdev(self.values())
+		return stats.pstdev(self.elements())
 
 	def best_pair(self):
 		return self.most_common(1)[0]

From 7ad446d14de6712b9a55b9282272887d256bf215 Mon Sep 17 00:00:00 2001
From: im-rodrigo <rodrigopala91@gmail.com>
Date: Tue, 21 Jul 2015 23:28:46 -0400
Subject: [PATCH 2/6] upped vs. number

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 995ef2f..cb023d8 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,7 @@ def readme():
 
 setup(
     name='statscounter',
-    version='0.0.010',
+    version='0.0.011',
     url='https://github.com/datalib/statscounter',
     license='MIT',
     description="Python's missing statistical Swiss Army knife",

From 30bbd905114f98f144deacd0865eea845517c8c0 Mon Sep 17 00:00:00 2001
From: im-rodrigo <rodrigopala91@gmail.com>
Date: Mon, 10 Aug 2015 20:26:58 -0700
Subject: [PATCH 3/6] adding errors for median fn's for when dealing with
 non-numeric types

---
 statscounter/statscounter.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/statscounter/statscounter.py b/statscounter/statscounter.py
index c1e22d1..5e0055d 100644
--- a/statscounter/statscounter.py
+++ b/statscounter/statscounter.py
@@ -51,23 +51,37 @@ def expectation(self):
 	def median(self, ):
 		"""
 		"""
-		return stats.median(self.elements())
-
+		try:
+			return stats.median(self.elements())
+		except (TypeError):
+			raise WrongVariableTypeError("Distribution is not a numerical type.")
+		
 	def median_low(self):
 		"""
 		"""
-		return stats.median_low(self.elements())
+		try:
+			return stats.median_low(self.elements())
+		except (TypeError):
+			raise WrongVariableTypeError("Distribution is not a numerical type.")
+		
 
 	def median_high(self):
 		"""
 		"""
-		return stats.median_high(self.elements())
+		try:
+			return stats.median_high(self.elements())
+		except (TypeError):
+			raise WrongVariableTypeError("Distribution is not a numerical type.")
+		
 
 	def median_grouped(self):
 		"""
 		"""
-		return stats.median_grouped(self.elements())
-
+		try:
+			return stats.median_grouped(self.elements())
+		except (TypeError):
+			raise WrongVariableTypeError("Distribution is not a numerical type.")
+		
 	def mode(self):
 		"""
 		"""

From 5dd81b2b75e4b898d1920764b2ea4f160ee51366 Mon Sep 17 00:00:00 2001
From: im-rodrigo <rodrigopala91@gmail.com>
Date: Mon, 24 Aug 2015 20:39:56 -0700
Subject: [PATCH 4/6] adding MultipleMostCommonValuesError; max, argmax and
 best_pair should only return when a single value is most common

---
 statscounter/statscounter.py | 10 +++++++++-
 tests/test_statscounter.py   | 30 +++++++++++++++---------------
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/statscounter/statscounter.py b/statscounter/statscounter.py
index 5e0055d..a07a71b 100644
--- a/statscounter/statscounter.py
+++ b/statscounter/statscounter.py
@@ -33,6 +33,10 @@ class WrongVariableTypeError(ValueError):
 	"""
 	pass
 
+class MultipleMostCommonValuesError(ValueError):
+	""""""
+	pass
+
 
 class StatsCounter(Counter):
 	def mean(self):
@@ -108,7 +112,11 @@ def pstdev(self):
 		return stats.pstdev(self.elements())
 
 	def best_pair(self):
-		return self.most_common(1)[0]
+		best_two_pairs = self.most_common(2)[0]
+		try:
+			self.mode()
+		except (stats.StatisticsError):
+			raise MultipleMostCommonValuesError("Two or more values appear more than once.")
 
 	def argmax(self):
 		"""
diff --git a/tests/test_statscounter.py b/tests/test_statscounter.py
index fa551cd..af81557 100644
--- a/tests/test_statscounter.py
+++ b/tests/test_statscounter.py
@@ -1,27 +1,27 @@
 from __future__ import division
 from pytest import raises
 from statscounter import StatsCounter, stats
-
+from statscounter.statscounter import MultipleMostCommonValuesError
 
 class TestStatsCounter:
-	counter_ints = StatsCounter({str(s):s for s in range(1000)})
+	counter_ints = StatsCounter([1,1,2,3,4,4])
 
 	def test_mean_int(self):
 		m = self.counter_ints.mean()
-		d = 499500/1000
+		d = 15/6
 		assert m == d
 
 	def test_median_low(self):
 		m = self.counter_ints.median_low()
-		assert m == 499
+		assert m == 2
 
 	def test_median_high(self, ):
 		m = self.counter_ints.median_high()
-		assert m == 500
+		assert m == 3
 
 	def test_median_grouped(self, ):
 		m = self.counter_ints.median_grouped()
-		assert m == 499.5
+		assert m == 2.5
 
 	def test_mode(self):
 		with raises(stats.StatisticsError):
@@ -29,28 +29,28 @@ def test_mode(self):
 
 	def test_variance(self):
 		m = self.counter_ints.variance()
-		assert m == 83416.66666666667
+		assert m == 1.9
 
 	def test_stdev(self, ):
 		m = self.counter_ints.stdev()
-		assert m == 288.8194360957494
+		assert m == 1.378404875209022
 
 	def test_pvariance(self):
 		m = self.counter_ints.pvariance()
-		assert m == 83333.25
+		assert m == 1.5833333333333333
 
 	def test_pstdev(self, ):
 		m = self.counter_ints.pstdev()
-		assert m == 288.6749902572095
+		assert m == 1.2583057392117916
 
 	def test_argmax(self):
-		m = self.counter_ints.argmax()
-		assert m == '999'
+		with raises(MultipleMostCommonValuesError):
+			m = self.counter_ints.argmax()
 
 	def test_max(self):
-		m = self.counter_ints.max()
-		assert m == 999
-
+		with raises(MultipleMostCommonValuesError):
+			m = self.counter_ints.max()
+		
 	def test_normalize(self):
 		pdist = StatsCounter({1: 1, 2: 2, 3: 1}).normalize()
 		assert pdist == {

From e4b1d8c5e7d55e73d035f901357c121fd2db5164 Mon Sep 17 00:00:00 2001
From: im-rodrigo <rodrigopala91@gmail.com>
Date: Mon, 24 Aug 2015 21:15:09 -0700
Subject: [PATCH 5/6] formatted test functions to match conventions regarding
 exception testing

---
 statscounter/statscounter.py |  7 ++++---
 tests/test_statscounter.py   | 35 ++++++++++++++++++++++-------------
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/statscounter/statscounter.py b/statscounter/statscounter.py
index a07a71b..6154a62 100644
--- a/statscounter/statscounter.py
+++ b/statscounter/statscounter.py
@@ -26,7 +26,7 @@
 
 
 class WrongVariableTypeError(ValueError):
-	"""You cannot find the 'expected value' (mean) of a distribution
+	"""You cannot find the 'expectation' (mean) of a distribution
 	of categorical (nominal) random variables (for example, a 
 	distribution of words is equivalent to a categorical variable).
 	It makes no sense to find the average word.
@@ -112,12 +112,13 @@ def pstdev(self):
 		return stats.pstdev(self.elements())
 
 	def best_pair(self):
-		best_two_pairs = self.most_common(2)[0]
 		try:
 			self.mode()
 		except (stats.StatisticsError):
 			raise MultipleMostCommonValuesError("Two or more values appear more than once.")
-
+		else:
+			return self.most_common(1)[0]
+		
 	def argmax(self):
 		"""
 		"""
diff --git a/tests/test_statscounter.py b/tests/test_statscounter.py
index af81557..bddd908 100644
--- a/tests/test_statscounter.py
+++ b/tests/test_statscounter.py
@@ -4,52 +4,61 @@
 from statscounter.statscounter import MultipleMostCommonValuesError
 
 class TestStatsCounter:
-	counter_ints = StatsCounter([1,1,2,3,4,4])
+	counter_ints = StatsCounter([1,1,2,3,4])
+	counter_ints_with_two_modes = StatsCounter([1,1,2,3,4,4])
 
 	def test_mean_int(self):
-		m = self.counter_ints.mean()
+		m = self.counter_ints_with_two_modes.mean()
 		d = 15/6
 		assert m == d
 
 	def test_median_low(self):
-		m = self.counter_ints.median_low()
+		m = self.counter_ints_with_two_modes.median_low()
 		assert m == 2
 
 	def test_median_high(self, ):
-		m = self.counter_ints.median_high()
+		m = self.counter_ints_with_two_modes.median_high()
 		assert m == 3
 
 	def test_median_grouped(self, ):
-		m = self.counter_ints.median_grouped()
+		m = self.counter_ints_with_two_modes.median_grouped()
 		assert m == 2.5
 
 	def test_mode(self):
 		with raises(stats.StatisticsError):
-			self.counter_ints.mode()
+			self.counter_ints_with_two_modes.mode()
 
 	def test_variance(self):
-		m = self.counter_ints.variance()
+		m = self.counter_ints_with_two_modes.variance()
 		assert m == 1.9
 
 	def test_stdev(self, ):
-		m = self.counter_ints.stdev()
+		m = self.counter_ints_with_two_modes.stdev()
 		assert m == 1.378404875209022
 
 	def test_pvariance(self):
-		m = self.counter_ints.pvariance()
+		m = self.counter_ints_with_two_modes.pvariance()
 		assert m == 1.5833333333333333
 
 	def test_pstdev(self, ):
-		m = self.counter_ints.pstdev()
+		m = self.counter_ints_with_two_modes.pstdev()
 		assert m == 1.2583057392117916
-
+	
 	def test_argmax(self):
+		m = self.counter_ints.argmax()
+		assert m == 1
+		
+	def test_argmax_throws_exception(self):
 		with raises(MultipleMostCommonValuesError):
-			m = self.counter_ints.argmax()
+			m = self.counter_ints_with_two_modes.argmax()
 
 	def test_max(self):
+		m = self.counter_ints.max()
+		assert m == 2
+
+	def test_max_throws_exception(self):
 		with raises(MultipleMostCommonValuesError):
-			m = self.counter_ints.max()
+			m = self.counter_ints_with_two_modes.max()
 		
 	def test_normalize(self):
 		pdist = StatsCounter({1: 1, 2: 2, 3: 1}).normalize()

From 7b9bc61ea57e15fb8d770bc3b6e7f63f0122babc Mon Sep 17 00:00:00 2001
From: im-rodrigo <rodrigopala91@gmail.com>
Date: Tue, 25 Aug 2015 00:07:59 -0700
Subject: [PATCH 6/6] added key_types_distribution, which creates a prob. dist.
 of types of the elements of the referencing class; used for checking if dist
 is 'discrete random variable'

---
 statscounter/statscounter.py | 48 ++++++++++++++++++---------------
 tests/test_statscounter.py   | 52 +++++++++++++++++++++++++++++++-----
 2 files changed, 71 insertions(+), 29 deletions(-)

diff --git a/statscounter/statscounter.py b/statscounter/statscounter.py
index 6154a62..3a2e16c 100644
--- a/statscounter/statscounter.py
+++ b/statscounter/statscounter.py
@@ -25,13 +25,8 @@
 import statscounter.stats as stats
 
 
-class WrongVariableTypeError(ValueError):
-	"""You cannot find the 'expectation' (mean) of a distribution
-	of categorical (nominal) random variables (for example, a 
-	distribution of words is equivalent to a categorical variable).
-	It makes no sense to find the average word.
-	"""
-	pass
+NUMBER_TYPES = set(['float', 'int', 'Decimal', 'Fraction'])
+
 
 class MultipleMostCommonValuesError(ValueError):
 	""""""
@@ -39,13 +34,19 @@ class MultipleMostCommonValuesError(ValueError):
 
 
 class StatsCounter(Counter):
+	
+	def key_types_distribution(self):
+		"""Return a p. distribution of the elements' types"""
+		return StatsCounter([type(element).__name__ 
+							 for element in self.elements()]).normalize()
+		
 	def mean(self):
 		""" AKA Expectation
 		"""
 		try:
 			return stats.mean(self.elements())
 		except (TypeError):
-			raise WrongVariableTypeError("Distribution is not a numerical type.")
+			raise TypeError("Distribution is not a numerical type.")
 		
 	def expectation(self):
 		"""
@@ -55,36 +56,39 @@ def expectation(self):
 	def median(self, ):
 		"""
 		"""
-		try:
+		key_type = self.key_types_distribution().most_common(1)[0]
+		print(key_type)
+		if key_type[0] not in NUMBER_TYPES or key_type[1] != 1.0:
+			raise TypeError("Distribution is not a numerical type.")
+		else:
 			return stats.median(self.elements())
-		except (TypeError):
-			raise WrongVariableTypeError("Distribution is not a numerical type.")
 		
 	def median_low(self):
 		"""
 		"""
-		try:
+		key_type = self.key_types_distribution().most_common(1)[0]
+		if key_type[0] not in NUMBER_TYPES or key_type[1] != 1.0:
+			raise TypeError("Distribution is not a numerical type.")
+		else:
 			return stats.median_low(self.elements())
-		except (TypeError):
-			raise WrongVariableTypeError("Distribution is not a numerical type.")
-		
 
 	def median_high(self):
 		"""
 		"""
-		try:
+		key_type = self.key_types_distribution().most_common(1)[0]
+		if key_type[0] not in NUMBER_TYPES or key_type[1] != 1.0:
+			raise TypeError("Distribution is not a numerical type.")
+		else:
 			return stats.median_high(self.elements())
-		except (TypeError):
-			raise WrongVariableTypeError("Distribution is not a numerical type.")
 		
-
 	def median_grouped(self):
 		"""
 		"""
-		try:
+		key_type = self.key_types_distribution().most_common(1)[0]
+		if key_type[0] not in NUMBER_TYPES or key_type[1] != 1.0:
+			raise TypeError("Distribution is not a numerical type.")
+		else:	
 			return stats.median_grouped(self.elements())
-		except (TypeError):
-			raise WrongVariableTypeError("Distribution is not a numerical type.")
 		
 	def mode(self):
 		"""
diff --git a/tests/test_statscounter.py b/tests/test_statscounter.py
index bddd908..dd028e7 100644
--- a/tests/test_statscounter.py
+++ b/tests/test_statscounter.py
@@ -6,25 +6,63 @@
 class TestStatsCounter:
 	counter_ints = StatsCounter([1,1,2,3,4])
 	counter_ints_with_two_modes = StatsCounter([1,1,2,3,4,4])
-
-	def test_mean_int(self):
+	counter_chars = StatsCounter('aabccd')
+	
+	def test_key_types_distribution(self):
+		ci = self.counter_ints.key_types_distribution()
+		ci2 = self.counter_ints_with_two_modes.key_types_distribution()
+		cc = self.counter_chars.key_types_distribution()
+		
+		assert ci == StatsCounter(['int'])
+		assert ci2 == StatsCounter(['int'])
+		assert cc == StatsCounter(['str']) 
+		
+	def test_mean(self):
 		m = self.counter_ints_with_two_modes.mean()
 		d = 15/6
 		assert m == d
-
+	
+	def test_mean_throws_exception(self):
+		with raises(TypeError):
+			self.counter_chars.mean()
+	
+	def test_median(self):
+		m = self.counter_ints_with_two_modes.median()
+		assert m == 2.5
+		
+	def test_median_throws_exception(self):
+		with raises(TypeError):
+			self.counter_chars.median()
+		
 	def test_median_low(self):
 		m = self.counter_ints_with_two_modes.median_low()
-		assert m == 2
-
+		assert m == 2		
+	
+	def test_median_low_throws_exception(self):
+		with raises(TypeError):
+			self.counter_chars.median_low()
+	
 	def test_median_high(self, ):
 		m = self.counter_ints_with_two_modes.median_high()
 		assert m == 3
+		
+	def test_median_high_throws_exception(self):
+		with raises(TypeError):
+			self.counter_chars.median_high()
 
 	def test_median_grouped(self, ):
 		m = self.counter_ints_with_two_modes.median_grouped()
 		assert m == 2.5
-
-	def test_mode(self):
+		
+	def test_median_grouped_throws_exception(self):
+		with raises(TypeError):
+			self.counter_chars.median_grouped()	
+	
+	def test_mode(self, ):
+		m = self.counter_ints.mode()
+		assert m == 1
+		
+	def test_mode_throws_exception(self):
 		with raises(stats.StatisticsError):
 			self.counter_ints_with_two_modes.mode()