import matplotlib.pyplot as plt
%matplotlib inline

A = [52,57,90]
B = [44,70,75]
C = [67,80,95]
X = ["First year","Second year","Third year"]

plt.plot(X,A,linewidth=0,marker="o",label="A")
plt.plot(X,B,linewidth=0,marker="x",label="B")
plt.plot(X,C,linewidth=0,marker="^",label="C")
plt.legend(loc="upper left")

<matplotlib.legend.Legend at 0x7fdbbf71ff28>

Y = [1,2,3]
plt.xticks([1,2,3])
plt.plot(Y,A,marker="o",label="A")
plt.plot(Y,B,marker="x",label="B")
plt.plot(Y,C,marker="^",label="C")
plt.legend(loc="upper left")

<matplotlib.legend.Legend at 0x7fdbbf79c208>

import numpy as np
from scipy import polyfit
X = [1,2,3]
Coef = []
Inter = []

a,b = polyfit(X,A,1)

Coef.append(a)
Inter.append(b)

x = np.arange(1,3,0.01)
y = a*x + b

plt.plot(X,A,linewidth=0,marker="o",label="A")
plt.plot(x,y,color="blue")

a,b = polyfit(X,B,1)

Coef.append(a)
Inter.append(b)

y = a*x + b

plt.plot(X,B,linewidth=0,marker="x",label="B")
plt.plot(x,y,color="orange")

a,b = polyfit(X,C,1)

Coef.append(a)
Inter.append(b)

y = a*x + b

plt.xticks([1,2,3])
plt.plot(X,C,linewidth=0,marker="^",label="C")
plt.plot(x,y,color="green")
plt.legend(loc="upper left")

<matplotlib.legend.Legend at 0x7fdbbf8d0f28>

# 「伸び」の違い
Coef

[18.999999999999986, 15.5, 13.999999999999993]

# 切片と傾きの相関係数
np.corrcoef(Coef,Inter)

array([[ 1.        , -0.81944417],
       [-0.81944417,  1.        ]])

import pandas as pd

group_A = pd.read_csv("../DATA02/GroupA.csv",index_col=0)
group_A.head()

group_B = pd.read_csv("../DATA02/GroupB.csv",index_col=0)
group_B.head()

group_A.describe()

group_B.describe()

import numpy as np
from scipy import polyfit

# データフレームをリストに変換
group_A_values = group_A.values

# 実際の点数を線で結ぶ
Y = [1,2,3]
plt.xticks([1,2,3])

for i in group_A_values:
    plt.plot(Y,i,c="grey",marker="+")

# 成長曲線モデル
X = [1,2,3]

x = np.arange(1,3,0.01)

plt.xticks([1,2,3])

for i in group_A_values:
    a,b = polyfit(X,i,1)
    y = a*x + b
    plt.plot(x,y,color="grey")

# データフレームをリストに変換
group_B_values = group_B.values

# 実際の点数を線で結ぶ
Y = [1,2,3]
plt.xticks([1,2,3])

for i in group_B_values:
    plt.plot(Y,i,c="grey",marker="+")

X = [1,2,3]
x = np.arange(1,3,0.01)
plt.xticks([1,2,3])
for i in group_B_values:
    a,b = polyfit(X,i,1)
    y = a*x + b
    plt.plot(x,y,color="grey")

# Group Aの傾きと切片の保存
Coef_A = []
Inter_A = []
for i in group_A_values:
    a,b = polyfit(X,i,1)
    y = a*x + b
    Coef_A.append(a)
    Inter_A.append(b)

# Group Aの傾きの平均値
np.mean(Coef_A)

24.783333333333324

# Group Aの傾きの標準偏差
np.std(Coef_A)

6.4493970433074

# Group Aの傾きと切片の相関係数
np.corrcoef(Coef_A,Inter_A)

array([[ 1.        , -0.05863524],
       [-0.05863524,  1.        ]])

# Group Bの傾きと切片の保存
Coef_B = []
Inter_B = []
for i in group_B_values:
    a,b = polyfit(X,i,1)
    y = a*x + b
    Coef_B.append(a)
    Inter_B.append(b)

# Group Bの傾きの平均値
np.mean(Coef_B)

11.083333333333325

# Group Bの傾きの標準偏差
np.std(Coef_B)

13.329843293235758

# Group Bの傾きと切片の相関係数
np.corrcoef(Coef_B,Inter_B)

array([[ 1.        , -0.73057181],
       [-0.73057181,  1.        ]])

	first_year	second_year	third_year
S001	86	101	155
S002	98	133	133
S003	71	101	133
S004	60	88	117
S005	24	55	65

	first_year	second_year	third_year
T001	75	98	123
T002	86	111	140
T003	50	81	101
T004	55	69	111
T005	70	29	34

	first_year	second_year	third_year
count	30.000000	30.000000	30.000000
mean	61.066667	82.100000	110.633333
std	18.247217	24.489759	25.910301
min	24.000000	44.000000	65.000000
25%	50.000000	61.750000	94.500000
50%	57.000000	76.500000	111.500000
75%	71.750000	100.500000	123.750000
max	98.000000	133.000000	171.000000

	first_year	second_year	third_year
count	30.000000	30.000000	30.000000
mean	68.366667	69.000000	90.533333
std	17.289589	22.777787	26.887676
min	33.000000	29.000000	34.000000
25%	53.500000	55.000000	71.000000
50%	67.500000	68.000000	90.000000
75%	84.250000	84.250000	106.000000
max	96.000000	111.000000	140.000000

教育データ分析入門2 第2回

2.1 縦断データ¶

2.2 成長曲線モデル¶

2.3 集団の変化¶

練習問題¶