pandasで、
ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series
というエラーが出ることがある。
結論をいうと、これは空のDataFrameに対してapply()をしたときに発生するエラーである。
そうなら「エラー:DataFrameが空だよ!」とか言ってくれればよいのだが、エラーから原因が推測しづらいので難しい。
空の(0行の)DataFrameにapplyをするとエラーになる
import pandas as pd
pd.__version__
'0.25.3'
適当にサンプルデータを作ります。
df = pd.DataFrame({ 'name' : ['Alice', 'Bob', 'Charlie', 'David', 'Eve'], 'height' : [1.66, 1.68, 1.70, 1.72, 1.75], 'weight' : [50, 55, 65, 75, 80] })
df
name | height | weight | |
---|---|---|---|
0 | Alice | 1.66 | 50 |
1 | Bob | 1.68 | 55 |
2 | Charlie | 1.70 | 65 |
3 | David | 1.72 | 75 |
4 | Eve | 1.75 | 80 |
このDataFrameにapplyを使って新しい列を追加しよう。
こういうとき、pandasで条件分岐(case when的な)によるデータ加工を網羅したい - Qiitaにはだいぶお世話になっている。
pandasで複数の列から新しい列を作りたいときにapply関数を使うが、覚えきれないので毎回参照している気がする。
今回は身長と体重を元にBMIを作って、BMIの値を元に体型が「やせ/標準/肥満」のどれなのかを返してみよう。
上の記事の中の「複数変数が条件分岐の対象&カテゴリ化したい」に該当する。
# やせ/標準/肥満の定義は: # https://www.e-healthnet.mhlw.go.jp/information/dictionary/metabolic/ym-002.html def calc_shape(x): bmi = x.weight / (x.height ** 2) if bmi >= 25: return("Overweight") elif bmi >= 18.5: return("Normal") else: return("Thin")
df["body_shape"] = df.apply(lambda x: calc_shape(x), axis=1)
df
name | height | weight | body_shape | |
---|---|---|---|---|
0 | Alice | 1.66 | 50 | Thin |
1 | Bob | 1.68 | 55 | Normal |
2 | Charlie | 1.70 | 65 | Normal |
3 | David | 1.72 | 75 | Overweight |
4 | Eve | 1.75 | 80 | Overweight |
これは問題ない。
問題はここから先だ。applyを適用する前に色々選択や抽出の処理があって、DataFrameが空だった場合、エラーが発生する。
df = pd.DataFrame({ 'name' : ['Alice', 'Bob', 'Charlie', 'David', 'Eve'], 'height' : [1.66, 1.68, 1.70, 1.72, 1.75], 'weight' : [50, 55, 65, 75, 80] }) df2 = df[df["name"] == "Foo"]
df2 # 空のデータ
name | height | weight |
---|
df2["body_shape"] = df2.apply(lambda x: calc_shape(x), axis=1)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/usr/local/lib/python3.7/site-packages/pandas/core/frame.py in _ensure_valid_index(self, value)
3539 try:
-> 3540 value = Series(value)
3541 except (ValueError, NotImplementedError, TypeError):
/usr/local/lib/python3.7/site-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
315
--> 316 data = SingleBlockManager(data, index, fastpath=True)
317
/usr/local/lib/python3.7/site-packages/pandas/core/internals/managers.py in __init__(self, block, axis, do_integrity_check, fastpath)
1515 if not isinstance(block, Block):
-> 1516 block = make_block(block, placement=slice(0, len(axis)), ndim=1)
1517
/usr/local/lib/python3.7/site-packages/pandas/core/internals/blocks.py in make_block(values, placement, klass, ndim, dtype, fastpath)
3283
-> 3284 return klass(values, ndim=ndim, placement=placement)
3285
/usr/local/lib/python3.7/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
2791
-> 2792 super().__init__(values, ndim=ndim, placement=placement)
2793
/usr/local/lib/python3.7/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
127 "Wrong number of items passed {val}, placement implies "
--> 128 "{mgr}".format(val=len(self.values), mgr=len(self.mgr_locs))
129 )
ValueError: Wrong number of items passed 3, placement implies 0
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-10-7f0bdf7b09d6> in <module>
----> 1 df2["body_shape"] = df2.apply(lambda x: calc_shape(x), axis=1)
/usr/local/lib/python3.7/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
3485 else:
3486 # set column
-> 3487 self._set_item(key, value)
3488
3489 def _setitem_slice(self, key, value):
/usr/local/lib/python3.7/site-packages/pandas/core/frame.py in _set_item(self, key, value)
3561 """
3562
-> 3563 self._ensure_valid_index(value)
3564 value = self._sanitize_column(key, value)
3565 NDFrame._set_item(self, key, value)
/usr/local/lib/python3.7/site-packages/pandas/core/frame.py in _ensure_valid_index(self, value)
3541 except (ValueError, NotImplementedError, TypeError):
3542 raise ValueError(
-> 3543 "Cannot set a frame with no defined index "
3544 "and a value that cannot be converted to a "
3545 "Series"
ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series
type(df2)
pandas.core.frame.DataFrame
対処法としては、DataFrameが空でないことを判定してからapplyを適用すればよい。
df2.empty
True
len(df2)==0
True
このあたりを使えばよいだろう。
……ここまでの内容はほとんど、このStackOverflowに書いてあります。
https://stackoverflow.com/questions/48306694/valueerror-cannot-set-a-frame-with-no-defined-index-and-a-value-that-cannot-be
実はpandas1.0.1では別の挙動になる。
pandas1系になると挙動が変わるのね……メジャーアップデートは結構変更が起きるんかな……
エラーになるのは同じだけど、エラーメッセージが違ったので、こちらも載せておく。
import pandas as pd
pd.__version__
'1.0.1'
df = pd.DataFrame({ 'name' : ['Alice', 'Bob', 'Charlie', 'David', 'Eve'], 'height' : [1.66, 1.68, 1.70, 1.72, 1.75], 'weight' : [50, 55, 65, 75, 80] }) df2 = df[df["name"] == "Foo"]
df2
name | height | weight |
---|
df2["body_shape"] = df2.apply(lambda x: calc_shape(x), axis=1)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'body_shape'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/site-packages/pandas/core/internals/managers.py in set(self, item, value)
1070 try:
-> 1071 loc = self.items.get_loc(item)
1072 except KeyError:
/usr/local/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'body_shape'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-10-7f0bdf7b09d6> in <module>
----> 1 df2["body_shape"] = df2.apply(lambda x: calc_shape(x), axis=1)
/usr/local/lib/python3.7/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
2936 else:
2937 # set column
-> 2938 self._set_item(key, value)
2939
2940 def _setitem_slice(self, key, value):
/usr/local/lib/python3.7/site-packages/pandas/core/frame.py in _set_item(self, key, value)
2999 self._ensure_valid_index(value)
3000 value = self._sanitize_column(key, value)
-> 3001 NDFrame._set_item(self, key, value)
3002
3003 # check if we are modifying a copy
/usr/local/lib/python3.7/site-packages/pandas/core/generic.py in _set_item(self, key, value)
3622
3623 def _set_item(self, key, value) -> None:
-> 3624 self._data.set(key, value)
3625 self._clear_item_cache()
3626
/usr/local/lib/python3.7/site-packages/pandas/core/internals/managers.py in set(self, item, value)
1072 except KeyError:
1073 # This item wasn't present, just insert at end
-> 1074 self.insert(len(self.items), item, value)
1075 return
1076
/usr/local/lib/python3.7/site-packages/pandas/core/internals/managers.py in insert(self, loc, item, value, allow_duplicates)
1179 new_axis = self.items.insert(loc, item)
1180
-> 1181 block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
1182
1183 for blkno, count in _fast_count_smallints(self._blknos[loc:]):
/usr/local/lib/python3.7/site-packages/pandas/core/internals/blocks.py in make_block(values, placement, klass, ndim, dtype)
3039 values = DatetimeArray._simple_new(values, dtype=dtype)
3040
-> 3041 return klass(values, ndim=ndim, placement=placement)
3042
3043
/usr/local/lib/python3.7/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
2587 values = np.array(values, dtype=object)
2588
-> 2589 super().__init__(values, ndim=ndim, placement=placement)
2590
2591 @property
/usr/local/lib/python3.7/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
123 if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
124 raise ValueError(
--> 125 f"Wrong number of items passed {len(self.values)}, "
126 f"placement implies {len(self.mgr_locs)}"
127 )
ValueError: Wrong number of items passed 3, placement implies 1
それでは。