第5回の練習問題の解答例¶

../DATA01/IEDA05_03.csvには92人が60問の問題に回答し、正解は1、不正解は0が入力されています。受験者を上位群、中位群、下位群に分け、項目ごとに群ごとの正解率を算出し、Q1、Q5、Q15の項目特性曲線を描画しなさい。

In [1]:
# パッケージのimport
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [4]:
data = pd.read_csv("../DATA01/IEDA05_03.csv",index_col=0)
data.head()
Out[4]:
Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 ... Q51 Q52 Q53 Q54 Q55 Q56 Q57 Q58 Q59 Q60
S001 1 1 1 1 1 1 1 1 1 1 ... 0 1 0 0 1 1 1 0 1 1
S002 1 1 1 0 1 1 1 1 1 1 ... 0 1 1 0 1 0 0 0 0 0
S003 0 1 1 1 1 1 0 1 1 1 ... 1 1 0 0 1 1 1 0 0 0
S004 1 1 1 1 0 1 1 1 1 1 ... 1 1 0 1 1 1 1 0 1 1
S005 1 0 1 1 1 1 1 1 1 1 ... 0 1 0 1 0 0 0 0 0 0

5 rows × 60 columns

In [3]:
data["total"] = data.sum(axis=1)
data.head()
Out[3]:
Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 ... Q52 Q53 Q54 Q55 Q56 Q57 Q58 Q59 Q60 total
S001 1 1 1 1 1 1 1 1 1 1 ... 1 0 0 1 1 1 0 1 1 44
S002 1 1 1 0 1 1 1 1 1 1 ... 1 1 0 1 0 0 0 0 0 32
S003 0 1 1 1 1 1 0 1 1 1 ... 1 0 0 1 1 1 0 0 0 37
S004 1 1 1 1 0 1 1 1 1 1 ... 1 0 1 1 1 1 0 1 1 44
S005 1 0 1 1 1 1 1 1 1 1 ... 1 0 1 0 0 0 0 0 0 40

5 rows × 61 columns

In [5]:
s_total = data.sum(axis=1)
s_total.name = "total"
s_total
Out[5]:
S001    44
S002    32
S003    37
S004    44
S005    40
        ..
S088    42
S089    44
S090    46
S091    29
S092    38
Name: total, Length: 92, dtype: int64
In [7]:
G = []

upper = s_total.quantile(.75)
lower = s_total.quantile(.25)

for i in s_total:
    if i > upper:
        G.append(3)
    elif i < lower:
        G.append(1)
    else:
        G.append(2)
In [8]:
data["GP"] = G
In [9]:
data.head()
Out[9]:
Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 ... Q52 Q53 Q54 Q55 Q56 Q57 Q58 Q59 Q60 GP
S001 1 1 1 1 1 1 1 1 1 1 ... 1 0 0 1 1 1 0 1 1 2
S002 1 1 1 0 1 1 1 1 1 1 ... 1 1 0 1 0 0 0 0 0 1
S003 0 1 1 1 1 1 0 1 1 1 ... 1 0 0 1 1 1 0 0 0 2
S004 1 1 1 1 0 1 1 1 1 1 ... 1 0 1 1 1 1 0 1 1 2
S005 1 0 1 1 1 1 1 1 1 1 ... 1 0 1 0 0 0 0 0 0 2

5 rows × 61 columns

In [10]:
data.groupby("GP").mean()
Out[10]:
Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 ... Q51 Q52 Q53 Q54 Q55 Q56 Q57 Q58 Q59 Q60
GP
1 0.909091 0.545455 0.863636 0.545455 0.727273 0.909091 0.772727 0.727273 0.500000 0.818182 ... 0.272727 0.363636 0.227273 0.272727 0.409091 0.409091 0.409091 0.181818 0.181818 0.272727
2 0.938776 0.795918 0.959184 0.673469 0.918367 0.979592 0.918367 0.897959 0.877551 0.938776 ... 0.387755 0.734694 0.244898 0.387755 0.653061 0.857143 0.755102 0.306122 0.367347 0.571429
3 0.952381 0.952381 0.904762 0.952381 1.000000 1.000000 0.952381 0.952381 1.000000 1.000000 ... 0.476190 0.857143 0.476190 0.190476 0.857143 0.952381 0.952381 0.476190 0.523810 0.666667

3 rows × 60 columns

In [11]:
data.groupby("GP")["Q1","Q5","Q10"].mean().plot(kind="line")
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fbd87a1d2e8>
No description has been provided for this image