## Q7(Program_Language): 칼럼번호 8~20 - others df21_Jp_PL = pd.DataFrame() df21_Jp_PL['Program_Language'] = [df21_Jp[col][1:].value_counts().index[0] for col in df21_Jp.columns[7:20]] df21_Jp_PL['counts'] = [df21_Jp[col][1:].value_counts().values[0] for col in df21_Jp.columns[7:20]]
## Q7(Program_Language): 칼럼번호 8~20 - others df21_Ch_PL = pd.DataFrame() df21_Ch_PL['Program_Language'] = [df21_Ch[col][1:].value_counts() .index[0] for col in df21_Ch.columns[7:20]] df21_Ch_PL['counts'] = [df21_Ch[col][1:].value_counts() .values[0] for col in df21_Ch.columns[7:20]]
## 제거된 나라 칼럼과 value를 각각 삽입 및 통합 df21_Jp_PL.insert(0, 'Country', 'Japan') df21_Ch_PL.insert(0, 'Country', 'China')
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-5-89d86f0a4d0b> in <module>
11 ## Q7(Program_Language): 칼럼번호 8~20 - others
12 df21_Ch_PL = pd.DataFrame()
---> 13 df21_Ch_PL['Program_Language'] = [df21_Ch[col][1:].value_counts() .index[0] for col in df21_Ch.columns[7:20]]
14 df21_Ch_PL['counts'] = [df21_Ch[col][1:].value_counts() .values[0] for col in df21_Ch.columns[7:20]]
15
<ipython-input-5-89d86f0a4d0b> in <listcomp>(.0)
11 ## Q7(Program_Language): 칼럼번호 8~20 - others
12 df21_Ch_PL = pd.DataFrame()
---> 13 df21_Ch_PL['Program_Language'] = [df21_Ch[col][1:].value_counts() .index[0] for col in df21_Ch.columns[7:20]]
14 df21_Ch_PL['counts'] = [df21_Ch[col][1:].value_counts() .values[0] for col in df21_Ch.columns[7:20]]
15
E:\Sadness\anaconda3\lib\site-packages\pandas\core\indexes\base.py in __getitem__(self, key)
4295 if is_scalar(key):
4296 key = com.cast_scalar_indexer(key, warn_float=True)
-> 4297 return getitem(key)
4298
4299 if isinstance(key, slice):
IndexError: index 0 is out of bounds for axis 0 with size 0
결측 column 식별 및 제거
*IndexError: index 0 is out of bounds for axis 0 with size 0 오류가 식별됐다. 아마 China, Program_Language의 특정 응답이 없어서 발생한거같다.
## Q7(Program_Language): 칼럼번호 8~20 - others df21_Jp_PL = pd.DataFrame() df21_Jp_PL['Program_Language'] = [df21_Jp[col][1:].value_counts().index[0] for col in df21_Jp.columns[7:19]] df21_Jp_PL['counts'] = [df21_Jp[col][1:].value_counts().values[0] for col in df21_Jp.columns[7:19]]
## Q7(Program_Language): 칼럼번호 8~20 - others - Q7_Part12(None) df21_Ch_PL = pd.DataFrame() df21_Ch_PL['Program_Language'] = [df21_Ch_rmQ07P12[col][1:].value_counts() .index[0] for col in df21_Ch_rmQ07P12.columns[7:18]] df21_Ch_PL['counts'] = [df21_Ch_rmQ07P12[col][1:].value_counts() .values[0] for col in df21_Ch_rmQ07P12.columns[7:18]]
## 제거된 나라 칼럼과 value를 각각 삽입 및 통합 df21_Jp_PL.insert(0, 'Country', 'Japan') df21_Ch_PL.insert(0, 'Country', 'China')
## Q7(Program_Language): 칼럼번호 8~20 - others df21_Jp_PL = pd.DataFrame() df21_Jp_PL['Program_Language'] = [df21_Jp[col][1:].value_counts().index[0] for col in df21_Jp.columns[7:19]] df21_Jp_PL['counts'] = [df21_Jp[col][1:].value_counts().values[0] for col in df21_Jp.columns[7:19]]
## Q7(Program_Language): 칼럼번호 8~20 - others - Q7_Part12(None) df21_Ch_PL = pd.DataFrame() df21_Ch_PL['Program_Language'] = [df21_Ch_rmQ07P12[col][1:].value_counts() .index[0] for col in df21_Ch_rmQ07P12.columns[7:18]] df21_Ch_PL['counts'] = [df21_Ch_rmQ07P12[col][1:].value_counts() .values[0] for col in df21_Ch_rmQ07P12.columns[7:18]]
## 제거된 나라 칼럼과 value를 각각 삽입 및 통합 df21_Jp_PL.insert(0, 'Country', 'Japan') df21_Ch_PL.insert(0, 'Country', 'China')
## Q18(Program_Language): 칼럼번호 83~95 - others & other(text) df19_Jp_PL = pd.DataFrame() df19_Jp_PL['Program_Language'] = [df19_Jp[col][1:].value_counts().index[0] for col in df19_Jp.columns[82:93]] df19_Jp_PL['counts'] = [df19_Jp[col][1:].value_counts().values[0] for col in df19_Jp.columns[82:93]]
## 2019 China Q18_Part11(None) 결측값 제거 df19_Ch_rmQ18P11 = df19_Ch.drop(['Q18_Part_11'], axis='columns')
## Q18(Program_Language): 칼럼번호 83~95 - others & other(text) - Q18_Part11(None) df19_Ch_PL = pd.DataFrame() df19_Ch_PL['Program_Language'] = [df19_Ch_rmQ18P11[col][1:].value_counts() .index[0] for col in df19_Ch_rmQ18P11.columns[82:92]] df19_Ch_PL['counts'] = [df19_Ch_rmQ18P11[col][1:].value_counts() .values[0] for col in df19_Ch_rmQ18P11.columns[82:92]]
앞으로 많은 그래프를 그려낼거고 df_Jp['Q2'][1:].value_counts() 형식이 반복된다.
df_Jp['Q2'][1:].value_counts()을 객체로 만들어서 넣어도 되겠지만, 이번 작업에서 사용할 df은 df_Jp&df_Ch 2개로 dataframe 객체의 변동이 있고, 칼럼명도 Q1,Q2로 변동이 있다. 위 조건에 부합하는 간단한 함수 하나 만들겠다.