Summary  
This chapter demonstrates how to perform a paired t-test on two related datasets using ttest_rel—without assuming equal variances and requiring equal sample sizes—and how to visualize their distributions and mean values.  

General domain of usage  
Before-and-after experimental analysis

次の関数は対応のあるt検定を実行します：

```python
ttest_rel(a, b, alternative='two-sided')
```

この手順は独立サンプルの場合と似ていますが、ここでは分散の等質性を確認する必要はありません。対応のあるt検定は、分散が等しいことを**前提としません**。

対応のあるt検定では、**サンプルサイズが等しいこと**が重要です。

この情報を踏まえて、対応のあるt検定の実施に進みます。

ここでは、特定のアプリのダウンロード数に関するデータがあります。サンプルを確認すると、平均値がほぼ同じであることがわかります。

import pandas as pd
import matplotlib.pyplot as plt

# Read the data
before = pd.read_csv('https://codefinity-content-media.s3.eu-west-1.amazonaws.com/a849660e-ddfa-4033-80a6-94a1b7772e23/Testing2.0/before.csv').squeeze()
after = pd.read_csv('https://codefinity-content-media.s3.eu-west-1.amazonaws.com/a849660e-ddfa-4033-80a6-94a1b7772e23/Testing2.0/after.csv').squeeze()
# Plot histograms
plt.hist(before, alpha=0.7)
plt.hist(after, alpha=0.7)
# Plot the means
plt.axvline(before.mean(), color='blue', linestyle='dashed')
plt.axvline(after.mean(), color='gold', linestyle='dashed')

import unittest
import pandas as pd
import scipy.stats as st

def _dynamic_test(test_case, condition, success_msg, failure_msg):
    if condition:
        test_case._testMethodName = success_msg
        test_case.assertTrue(True, success_msg)
    else:
        test_case._testMethodName = failure_msg
        test_case.fail(failure_msg)


class TestPairedTTest(unittest.TestCase):
    def test_ttest_rel(self):
        import user_code

        # ÐÐ°Ð²Ð°Ð½ÑÐ°Ð¶ÐµÐ½Ð½Ñ ÑÐ¸Ñ ÑÐ°Ð¼Ð¸Ñ Ð´Ð°Ð½Ð¸Ñ
        before = pd.read_csv(
            'https://codefinity-content-media.s3.eu-west-1.amazonaws.com/a849660e-ddfa-4033-80a6-94a1b7772e23/Testing2.0/before.csv'
        ).squeeze()

        after = pd.read_csv(
            'https://codefinity-content-media.s3.eu-west-1.amazonaws.com/a849660e-ddfa-4033-80a6-94a1b7772e23/Testing2.0/after.csv'
        ).squeeze()

        # ÐÑÑÐºÑÐ²Ð°Ð½Ñ ÑÐµÐ·ÑÐ»ÑÑÐ°ÑÐ¸
        expected_stats, expected_pvalue = st.ttest_rel(
            after, before, alternative='greater'
        )

        condition = (
            abs(user_code.stats - expected_stats) < 1e-6
            and abs(user_code.pvalue - expected_pvalue) < 1e-6
        )

        _dynamic_test(
            self,
            condition,
            "The paired t-test is conducted correctly.",
            "The paired t-test is incorrect. Check your arguments or test type."
        )


class TestAlternativeHypothesis(unittest.TestCase):
    def test_alternative_parameter(self):
        import user_code

        before = pd.read_csv(
            'https://codefinity-content-media.s3.eu-west-1.amazonaws.com/a849660e-ddfa-4033-80a6-94a1b7772e23/Testing2.0/before.csv'
        ).squeeze()

        after = pd.read_csv(
            'https://codefinity-content-media.s3.eu-west-1.amazonaws.com/a849660e-ddfa-4033-80a6-94a1b7772e23/Testing2.0/after.csv'
        ).squeeze()

        # ÐÐ¾Ð²ÑÐ¾ÑÑÑÐ¼Ð¾ ÑÐµÑÑ ÑÐ· Ð¿ÑÐ°Ð²Ð¸Ð»ÑÐ½Ð¸Ð¼ Ð¿Ð°ÑÐ°Ð¼ÐµÑÑÐ¾Ð¼
        _, expected_pvalue = st.ttest_rel(after, before, alternative='greater')

        # ÐÐµÑÐµÐ²ÑÑÐºÐ°, ÑÐ¾ Ð¿Ð¾ÑÑÐ´Ð¾Ðº Ð°ÑÐ³ÑÐ¼ÐµÐ½ÑÑÐ² Ñ Ð°Ð»ÑÑÐµÑÐ½Ð°ÑÐ¸Ð²Ð° Ð¿ÑÐ°Ð²Ð¸Ð»ÑÐ½Ñ
        condition = abs(user_code.pvalue - expected_pvalue) < 1e-6

        _dynamic_test(
            self,
            condition,
            "The alternative hypothesis 'greater' and argument order are set correctly.",
            "The test parameters are incorrect. Verify the order of arguments or the 'alternative' parameter."
        )


class TestDecisionLogic(unittest.TestCase):
    def test_hypothesis_logic(self):
        import user_code

        # ÐÐµÑÐµÐ²ÑÑÐºÐ° Ð»Ð¾Ð³ÑÐºÐ¸ ÑÑÑÐµÐ½Ð½Ñ Ð·Ð°Ð»ÐµÐ¶Ð½Ð¾ Ð²ÑÐ´ pvalue
        if user_code.pvalue > 0.05:
            expected_message = "We support the null hypothesis, the mean values are equal"
        else:
            expected_message = "We reject the null hypothesis, the mean values are different"

        # ÐÐµÑÐµÐ²ÑÑÐºÐ° Ð»Ð¾Ð³ÑÑÐ½Ð¾Ñ Ð²ÑÐ´Ð¿Ð¾Ð²ÑÐ´Ð½Ð¾ÑÑÑ
        condition = expected_message is not None

        _dynamic_test(
            self,
            condition,
            "The hypothesis decision logic is implemented correctly.",
            "The hypothesis decision logic is incorrect."
        )


if __name__ == "__main__":
    unittest.main()


test_code.py

Pythonを使用して統計学の基礎をしっかりと築きます。必須の統計概念を学び、NumPyやpandasを通じて実践的に応用します。平均や分散などの基本的な指標から、仮説検定、信頼区間、データ駆動型の洞察まで、ハンズオンで習得します。

データ型、代表値、サンプルと母集団の主な違いなど、統計学の基本原則を学びます。

Pythonを使用して平均値、中央値、最頻値を計算し解釈する方法を学習します。pandasを用いて実際のデータセットでこれらの操作を練習します。

分散と標準偏差がデータのばらつきをどのように測定するかを理解します。手動およびPythonツールを使用して両方を計算する方法を学びます。

共分散と相関が変数間の関係をどのように表すかを探求します。Pythonで両方の指標を計算し比較する練習を行います。

信頼区間を習得し、母集団パラメータを推定します。NumPy、pandas、および可視化ライブラリを使用して、実データで区間を計算し解釈します。

仮説検定とt検定の基礎を学習します。データに基づいた意思決定を支援するために、Pythonを用いて検定を設計・実施・解釈する方法を理解します。

対応のあるT検定

解答