tests/reshape/test_concat.py · alkaline-ml/pandas

alkaline-ml / pandas python

Repository URL to install this package:
Version: 1.1.1

/ tests / reshape / test_concat.py

from collections import OrderedDict, abc, deque
import datetime as dt
from datetime import datetime
from decimal import Decimal
from io import StringIO
from itertools import combinations
from warnings import catch_warnings

import dateutil
import numpy as np
from numpy.random import randn
import pytest

from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd
from pandas import (
    Categorical,
    DataFrame,
    DatetimeIndex,
    Index,
    MultiIndex,
    Series,
    Timestamp,
    concat,
    date_range,
    isna,
    read_csv,
)
import pandas._testing as tm
from pandas.core.arrays import SparseArray
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.tests.extension.decimal import to_decimal


@pytest.fixture(params=[True, False])
def sort(request):
    """Boolean sort keyword for concat and DataFrame.append."""
    return request.param


class TestConcatAppendCommon:
    """
    Test common dtype coercion rules between concat and append.
    """

    def setup_method(self, method):

        dt_data = [
            pd.Timestamp("2011-01-01"),
            pd.Timestamp("2011-01-02"),
            pd.Timestamp("2011-01-03"),
        ]
        tz_data = [
            pd.Timestamp("2011-01-01", tz="US/Eastern"),
            pd.Timestamp("2011-01-02", tz="US/Eastern"),
            pd.Timestamp("2011-01-03", tz="US/Eastern"),
        ]

        td_data = [
            pd.Timedelta("1 days"),
            pd.Timedelta("2 days"),
            pd.Timedelta("3 days"),
        ]

        period_data = [
            pd.Period("2011-01", freq="M"),
            pd.Period("2011-02", freq="M"),
            pd.Period("2011-03", freq="M"),
        ]

        self.data = {
            "bool": [True, False, True],
            "int64": [1, 2, 3],
            "float64": [1.1, np.nan, 3.3],
            "category": pd.Categorical(["X", "Y", "Z"]),
            "object": ["a", "b", "c"],
            "datetime64[ns]": dt_data,
            "datetime64[ns, US/Eastern]": tz_data,
            "timedelta64[ns]": td_data,
            "period[M]": period_data,
        }

    def _check_expected_dtype(self, obj, label):
        """
        Check whether obj has expected dtype depending on label
        considering not-supported dtypes
        """
        if isinstance(obj, pd.Index):
            if label == "bool":
                assert obj.dtype == "object"
            else:
                assert obj.dtype == label
        elif isinstance(obj, pd.Series):
            if label.startswith("period"):
                assert obj.dtype == "Period[M]"
            else:
                assert obj.dtype == label
        else:
            raise ValueError

    def test_dtypes(self):
        # to confirm test case covers intended dtypes
        for typ, vals in self.data.items():
            self._check_expected_dtype(pd.Index(vals), typ)
            self._check_expected_dtype(pd.Series(vals), typ)

    def test_concatlike_same_dtypes(self):
        # GH 13660
        for typ1, vals1 in self.data.items():

            vals2 = vals1
            vals3 = vals1

            if typ1 == "category":
                exp_data = pd.Categorical(list(vals1) + list(vals2))
                exp_data3 = pd.Categorical(list(vals1) + list(vals2) + list(vals3))
            else:
                exp_data = vals1 + vals2
                exp_data3 = vals1 + vals2 + vals3

            # ----- Index ----- #

            # index.append
            res = pd.Index(vals1).append(pd.Index(vals2))
            exp = pd.Index(exp_data)
            tm.assert_index_equal(res, exp)

            # 3 elements
            res = pd.Index(vals1).append([pd.Index(vals2), pd.Index(vals3)])
            exp = pd.Index(exp_data3)
            tm.assert_index_equal(res, exp)

            # index.append name mismatch
            i1 = pd.Index(vals1, name="x")
            i2 = pd.Index(vals2, name="y")
            res = i1.append(i2)
            exp = pd.Index(exp_data)
            tm.assert_index_equal(res, exp)

            # index.append name match
            i1 = pd.Index(vals1, name="x")
            i2 = pd.Index(vals2, name="x")
            res = i1.append(i2)
            exp = pd.Index(exp_data, name="x")
            tm.assert_index_equal(res, exp)

            # cannot append non-index
            with pytest.raises(TypeError, match="all inputs must be Index"):
                pd.Index(vals1).append(vals2)

            with pytest.raises(TypeError, match="all inputs must be Index"):
                pd.Index(vals1).append([pd.Index(vals2), vals3])

            # ----- Series ----- #

            # series.append
            res = pd.Series(vals1).append(pd.Series(vals2), ignore_index=True)
            exp = pd.Series(exp_data)
            tm.assert_series_equal(res, exp, check_index_type=True)

            # concat
            res = pd.concat([pd.Series(vals1), pd.Series(vals2)], ignore_index=True)
            tm.assert_series_equal(res, exp, check_index_type=True)

            # 3 elements
            res = pd.Series(vals1).append(
                [pd.Series(vals2), pd.Series(vals3)], ignore_index=True
            )
            exp = pd.Series(exp_data3)
            tm.assert_series_equal(res, exp)

            res = pd.concat(
                [pd.Series(vals1), pd.Series(vals2), pd.Series(vals3)],
                ignore_index=True,
            )
            tm.assert_series_equal(res, exp)

            # name mismatch
            s1 = pd.Series(vals1, name="x")
            s2 = pd.Series(vals2, name="y")
            res = s1.append(s2, ignore_index=True)
            exp = pd.Series(exp_data)
            tm.assert_series_equal(res, exp, check_index_type=True)

            res = pd.concat([s1, s2], ignore_index=True)
            tm.assert_series_equal(res, exp, check_index_type=True)

            # name match
            s1 = pd.Series(vals1, name="x")
            s2 = pd.Series(vals2, name="x")
            res = s1.append(s2, ignore_index=True)
            exp = pd.Series(exp_data, name="x")
            tm.assert_series_equal(res, exp, check_index_type=True)

            res = pd.concat([s1, s2], ignore_index=True)
            tm.assert_series_equal(res, exp, check_index_type=True)

            # cannot append non-index
            msg = (
                r"cannot concatenate object of type '.+'; "
                "only Series and DataFrame objs are valid"
            )
            with pytest.raises(TypeError, match=msg):
                pd.Series(vals1).append(vals2)

            with pytest.raises(TypeError, match=msg):
                pd.Series(vals1).append([pd.Series(vals2), vals3])

            with pytest.raises(TypeError, match=msg):
                pd.concat([pd.Series(vals1), vals2])

            with pytest.raises(TypeError, match=msg):
                pd.concat([pd.Series(vals1), pd.Series(vals2), vals3])

    def test_concatlike_dtypes_coercion(self):
        # GH 13660
        for typ1, vals1 in self.data.items():
            for typ2, vals2 in self.data.items():

                vals3 = vals2

                # basically infer
                exp_index_dtype = None
                exp_series_dtype = None

                if typ1 == typ2:
                    # same dtype is tested in test_concatlike_same_dtypes
                    continue
                elif typ1 == "category" or typ2 == "category":
                    # TODO: suspicious
                    continue

                # specify expected dtype
                if typ1 == "bool" and typ2 in ("int64", "float64"):
                    # series coerces to numeric based on numpy rule
                    # index doesn't because bool is object dtype
                    exp_series_dtype = typ2
                elif typ2 == "bool" and typ1 in ("int64", "float64"):
                    exp_series_dtype = typ1
                elif (
                    typ1 == "datetime64[ns, US/Eastern]"
                    or typ2 == "datetime64[ns, US/Eastern]"
                    or typ1 == "timedelta64[ns]"
                    or typ2 == "timedelta64[ns]"
                ):
                    exp_index_dtype = object
                    exp_series_dtype = object

                exp_data = vals1 + vals2
                exp_data3 = vals1 + vals2 + vals3

                # ----- Index ----- #

                # index.append
                res = pd.Index(vals1).append(pd.Index(vals2))
                exp = pd.Index(exp_data, dtype=exp_index_dtype)
                tm.assert_index_equal(res, exp)

                # 3 elements
                res = pd.Index(vals1).append([pd.Index(vals2), pd.Index(vals3)])
                exp = pd.Index(exp_data3, dtype=exp_index_dtype)
                tm.assert_index_equal(res, exp)

                # ----- Series ----- #

                # series.append
                res = pd.Series(vals1).append(pd.Series(vals2), ignore_index=True)
                exp = pd.Series(exp_data, dtype=exp_series_dtype)
                tm.assert_series_equal(res, exp, check_index_type=True)

                # concat
                res = pd.concat([pd.Series(vals1), pd.Series(vals2)], ignore_index=True)
                tm.assert_series_equal(res, exp, check_index_type=True)

                # 3 elements
                res = pd.Series(vals1).append(
                    [pd.Series(vals2), pd.Series(vals3)], ignore_index=True
                )
                exp = pd.Series(exp_data3, dtype=exp_series_dtype)
                tm.assert_series_equal(res, exp)

                res = pd.concat(
                    [pd.Series(vals1), pd.Series(vals2), pd.Series(vals3)],
                    ignore_index=True,
                )
                tm.assert_series_equal(res, exp)

    def test_concatlike_common_coerce_to_pandas_object(self):
        # GH 13626
        # result must be Timestamp/Timedelta, not datetime.datetime/timedelta
        dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"])
        tdi = pd.TimedeltaIndex(["1 days", "2 days"])

        exp = pd.Index(
            [
                pd.Timestamp("2011-01-01"),
                pd.Timestamp("2011-01-02"),
                pd.Timedelta("1 days"),
                pd.Timedelta("2 days"),
            ]
        )

        res = dti.append(tdi)
        tm.assert_index_equal(res, exp)
        assert isinstance(res[0], pd.Timestamp)
        assert isinstance(res[-1], pd.Timedelta)

        dts = pd.Series(dti)
        tds = pd.Series(tdi)
        res = dts.append(tds)
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
        assert isinstance(res.iloc[0], pd.Timestamp)
        assert isinstance(res.iloc[-1], pd.Timedelta)

        res = pd.concat([dts, tds])
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
        assert isinstance(res.iloc[0], pd.Timestamp)
        assert isinstance(res.iloc[-1], pd.Timedelta)

    def test_concatlike_datetimetz(self, tz_aware_fixture):
        tz = tz_aware_fixture
        # GH 7795
        dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz)

        exp = pd.DatetimeIndex(
            ["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz
        )

        res = dti1.append(dti2)
        tm.assert_index_equal(res, exp)

        dts1 = pd.Series(dti1)
        dts2 = pd.Series(dti2)
        res = dts1.append(dts2)
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

        res = pd.concat([dts1, dts2])
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

    @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"])
    def test_concatlike_datetimetz_short(self, tz):
        # GH#7795
        ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz)
        ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz)
        df1 = pd.DataFrame(0, index=ix1, columns=["A", "B"])
        df2 = pd.DataFrame(0, index=ix2, columns=["A", "B"])

        exp_idx = pd.DatetimeIndex(
            ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"],
            tz=tz,
        )
        exp = pd.DataFrame(0, index=exp_idx, columns=["A", "B"])

        tm.assert_frame_equal(df1.append(df2), exp)
        tm.assert_frame_equal(pd.concat([df1, df2]), exp)

    def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
        tz = tz_aware_fixture
        # GH 13660

        # different tz coerces to object
        dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"])

        exp = pd.Index(
            [
                pd.Timestamp("2011-01-01", tz=tz),
                pd.Timestamp("2011-01-02", tz=tz),
                pd.Timestamp("2012-01-01"),
                pd.Timestamp("2012-01-02"),
            ],
            dtype=object,
        )

        res = dti1.append(dti2)
        tm.assert_index_equal(res, exp)

        dts1 = pd.Series(dti1)
        dts2 = pd.Series(dti2)
        res = dts1.append(dts2)
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

        res = pd.concat([dts1, dts2])
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

        # different tz
        dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific")

        exp = pd.Index(
            [
                pd.Timestamp("2011-01-01", tz=tz),
                pd.Timestamp("2011-01-02", tz=tz),
                pd.Timestamp("2012-01-01", tz="US/Pacific"),
                pd.Timestamp("2012-01-02", tz="US/Pacific"),
            ],
            dtype=object,
        )

        res = dti1.append(dti3)
        # tm.assert_index_equal(res, exp)

        dts1 = pd.Series(dti1)
        dts3 = pd.Series(dti3)
        res = dts1.append(dts3)
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

        res = pd.concat([dts1, dts3])
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

    def test_concatlike_common_period(self):
        # GH 13660
        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
        pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M")

        exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M")

        res = pi1.append(pi2)
        tm.assert_index_equal(res, exp)

        ps1 = pd.Series(pi1)
        ps2 = pd.Series(pi2)
        res = ps1.append(ps2)
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

        res = pd.concat([ps1, ps2])
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

    def test_concatlike_common_period_diff_freq_to_object(self):
        # GH 13221
        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
        pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D")

        exp = pd.Index(
            [
                pd.Period("2011-01", freq="M"),
                pd.Period("2011-02", freq="M"),
                pd.Period("2012-01-01", freq="D"),
                pd.Period("2012-02-01", freq="D"),
            ],
            dtype=object,
        )

        res = pi1.append(pi2)
        tm.assert_index_equal(res, exp)

        ps1 = pd.Series(pi1)
        ps2 = pd.Series(pi2)
        res = ps1.append(ps2)
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

        res = pd.concat([ps1, ps2])
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

    def test_concatlike_common_period_mixed_dt_to_object(self):
        # GH 13221
        # different datetimelike
        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
        tdi = pd.TimedeltaIndex(["1 days", "2 days"])
        exp = pd.Index(
            [
                pd.Period("2011-01", freq="M"),
                pd.Period("2011-02", freq="M"),
                pd.Timedelta("1 days"),
                pd.Timedelta("2 days"),
            ],
            dtype=object,
        )

        res = pi1.append(tdi)
        tm.assert_index_equal(res, exp)

        ps1 = pd.Series(pi1)
        tds = pd.Series(tdi)
        res = ps1.append(tds)
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

        res = pd.concat([ps1, tds])
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

        # inverse
        exp = pd.Index(
            [
                pd.Timedelta("1 days"),
                pd.Timedelta("2 days"),
                pd.Period("2011-01", freq="M"),
                pd.Period("2011-02", freq="M"),
            ],
            dtype=object,
        )

        res = tdi.append(pi1)
        tm.assert_index_equal(res, exp)

        ps1 = pd.Series(pi1)
        tds = pd.Series(tdi)
        res = tds.append(ps1)
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

        res = pd.concat([tds, ps1])
        tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))

    def test_concat_categorical(self):
        # GH 13524

        # same categories -> category
        s1 = pd.Series([1, 2, np.nan], dtype="category")
        s2 = pd.Series([2, 1, 2], dtype="category")

        exp = pd.Series([1, 2, np.nan, 2, 1, 2], dtype="category")
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        # partially different categories => not-category
        s1 = pd.Series([3, 2], dtype="category")
        s2 = pd.Series([2, 1], dtype="category")

        exp = pd.Series([3, 2, 2, 1])
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        # completely different categories (same dtype) => not-category
        s1 = pd.Series([10, 11, np.nan], dtype="category")
        s2 = pd.Series([np.nan, 1, 3, 2], dtype="category")

        exp = pd.Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype="object")
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

    def test_union_categorical_same_categories_different_order(self):
        # https://github.com/pandas-dev/pandas/issues/19096
        a = pd.Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"]))
        b = pd.Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"]))
        result = pd.concat([a, b], ignore_index=True)
        expected = pd.Series(
            Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"])
        )
        tm.assert_series_equal(result, expected)

    def test_concat_categorical_coercion(self):
        # GH 13524

        # category + not-category => not-category
        s1 = pd.Series([1, 2, np.nan], dtype="category")
        s2 = pd.Series([2, 1, 2])

        exp = pd.Series([1, 2, np.nan, 2, 1, 2], dtype="object")
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        # result shouldn't be affected by 1st elem dtype
        exp = pd.Series([2, 1, 2, 1, 2, np.nan], dtype="object")
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)

        # all values are not in category => not-category
        s1 = pd.Series([3, 2], dtype="category")
        s2 = pd.Series([2, 1])

        exp = pd.Series([3, 2, 2, 1])
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        exp = pd.Series([2, 1, 3, 2])
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)

        # completely different categories => not-category
        s1 = pd.Series([10, 11, np.nan], dtype="category")
        s2 = pd.Series([1, 3, 2])

        exp = pd.Series([10, 11, np.nan, 1, 3, 2], dtype="object")
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        exp = pd.Series([1, 3, 2, 10, 11, np.nan], dtype="object")
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)

        # different dtype => not-category
        s1 = pd.Series([10, 11, np.nan], dtype="category")
        s2 = pd.Series(["a", "b", "c"])

        exp = pd.Series([10, 11, np.nan, "a", "b", "c"])
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        exp = pd.Series(["a", "b", "c", 10, 11, np.nan])
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)

        # if normal series only contains NaN-likes => not-category
        s1 = pd.Series([10, 11], dtype="category")
        s2 = pd.Series([np.nan, np.nan, np.nan])

        exp = pd.Series([10, 11, np.nan, np.nan, np.nan])
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        exp = pd.Series([np.nan, np.nan, np.nan, 10, 11])
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)

    def test_concat_categorical_3elem_coercion(self):
        # GH 13524

        # mixed dtypes => not-category
        s1 = pd.Series([1, 2, np.nan], dtype="category")
        s2 = pd.Series([2, 1, 2], dtype="category")
        s3 = pd.Series([1, 2, 1, 2, np.nan])

        exp = pd.Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float")
        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
        tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)

        exp = pd.Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float")
        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)

        # values are all in either category => not-category
        s1 = pd.Series([4, 5, 6], dtype="category")
        s2 = pd.Series([1, 2, 3], dtype="category")
        s3 = pd.Series([1, 3, 4])

        exp = pd.Series([4, 5, 6, 1, 2, 3, 1, 3, 4])
        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
        tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)

        exp = pd.Series([1, 3, 4, 4, 5, 6, 1, 2, 3])
        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)

        # values are all in either category => not-category
        s1 = pd.Series([4, 5, 6], dtype="category")
        s2 = pd.Series([1, 2, 3], dtype="category")
        s3 = pd.Series([10, 11, 12])

        exp = pd.Series([4, 5, 6, 1, 2, 3, 10, 11, 12])
        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
        tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)

        exp = pd.Series([10, 11, 12, 4, 5, 6, 1, 2, 3])
        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)

    def test_concat_categorical_multi_coercion(self):
        # GH 13524

        s1 = pd.Series([1, 3], dtype="category")
        s2 = pd.Series([3, 4], dtype="category")
        s3 = pd.Series([2, 3])
        s4 = pd.Series([2, 2], dtype="category")
        s5 = pd.Series([1, np.nan])
        s6 = pd.Series([1, 3, 2], dtype="category")

        # mixed dtype, values are all in categories => not-category
        exp = pd.Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2])
        res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True)
        tm.assert_series_equal(res, exp)
        res = s1.append([s2, s3, s4, s5, s6], ignore_index=True)
        tm.assert_series_equal(res, exp)

        exp = pd.Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3])
        res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True)
        tm.assert_series_equal(res, exp)
        res = s6.append([s5, s4, s3, s2, s1], ignore_index=True)
        tm.assert_series_equal(res, exp)

    def test_concat_categorical_ordered(self):
        # GH 13524

        s1 = pd.Series(pd.Categorical([1, 2, np.nan], ordered=True))
        s2 = pd.Series(pd.Categorical([2, 1, 2], ordered=True))

        exp = pd.Series(pd.Categorical([1, 2, np.nan, 2, 1, 2], ordered=True))
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        exp = pd.Series(
            pd.Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True)
        )
        tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp)
        tm.assert_series_equal(s1.append([s2, s1], ignore_index=True), exp)

    def test_concat_categorical_coercion_nan(self):
        # GH 13524

        # some edge cases
        # category + not-category => not category
        s1 = pd.Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category")
        s2 = pd.Series([np.nan, 1])

        exp = pd.Series([np.nan, np.nan, np.nan, 1])
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        s1 = pd.Series([1, np.nan], dtype="category")
        s2 = pd.Series([np.nan, np.nan])

        exp = pd.Series([1, np.nan, np.nan, np.nan], dtype="float")
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        # mixed dtype, all nan-likes => not-category
        s1 = pd.Series([np.nan, np.nan], dtype="category")
        s2 = pd.Series([np.nan, np.nan])

        exp = pd.Series([np.nan, np.nan, np.nan, np.nan])
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)

        # all category nan-likes => category
        s1 = pd.Series([np.nan, np.nan], dtype="category")
        s2 = pd.Series([np.nan, np.nan], dtype="category")

        exp = pd.Series([np.nan, np.nan, np.nan, np.nan], dtype="category")

        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

    def test_concat_categorical_empty(self):
        # GH 13524

        s1 = pd.Series([], dtype="category")
        s2 = pd.Series([1, 2], dtype="category")

        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)

        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
        tm.assert_series_equal(s2.append(s1, ignore_index=True), s2)

        s1 = pd.Series([], dtype="category")
        s2 = pd.Series([], dtype="category")

        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)

        s1 = pd.Series([], dtype="category")
        s2 = pd.Series([], dtype="object")

        # different dtype => not-category
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
        tm.assert_series_equal(s2.append(s1, ignore_index=True), s2)

        s1 = pd.Series([], dtype="category")
        s2 = pd.Series([np.nan, np.nan])

        # empty Series is ignored
        exp = pd.Series([np.nan, np.nan])
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
        tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
        tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)


class TestAppend:
    def test_append(self, sort, float_frame):
        mixed_frame = float_frame.copy()
        mixed_frame["foo"] = "bar"

        begin_index = float_frame.index[:5]
        end_index = float_frame.index[5:]

        begin_frame = float_frame.reindex(begin_index)
        end_frame = float_frame.reindex(end_index)

        appended = begin_frame.append(end_frame)
        tm.assert_almost_equal(appended["A"], float_frame["A"])

        del end_frame["A"]
        partial_appended = begin_frame.append(end_frame, sort=sort)
        assert "A" in partial_appended

        partial_appended = end_frame.append(begin_frame, sort=sort)
        assert "A" in partial_appended

        # mixed type handling
        appended = mixed_frame[:5].append(mixed_frame[5:])
        tm.assert_frame_equal(appended, mixed_frame)

        # what to test here
        mixed_appended = mixed_frame[:5].append(float_frame[5:], sort=sort)
        mixed_appended2 = float_frame[:5].append(mixed_frame[5:], sort=sort)

        # all equal except 'foo' column
        tm.assert_frame_equal(
            mixed_appended.reindex(columns=["A", "B", "C", "D"]),
            mixed_appended2.reindex(columns=["A", "B", "C", "D"]),
        )

    def test_append_empty(self, float_frame):
        empty = DataFrame()

        appended = float_frame.append(empty)
        tm.assert_frame_equal(float_frame, appended)
        assert appended is not float_frame

        appended = empty.append(float_frame)
        tm.assert_frame_equal(float_frame, appended)
        assert appended is not float_frame

    def test_append_overlap_raises(self, float_frame):
        msg = "Indexes have overlapping values"
        with pytest.raises(ValueError, match=msg):
            float_frame.append(float_frame, verify_integrity=True)

    def test_append_new_columns(self):
        # see gh-6129: new columns
        df = DataFrame({"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}})
        row = Series([5, 6, 7], index=["a", "b", "c"], name="z")
        expected = DataFrame(
            {
                "a": {"x": 1, "y": 2, "z": 5},
                "b": {"x": 3, "y": 4, "z": 6},
                "c": {"z": 7},
            }
        )
        result = df.append(row)
        tm.assert_frame_equal(result, expected)

    def test_append_length0_frame(self, sort):
        df = DataFrame(columns=["A", "B", "C"])
        df3 = DataFrame(index=[0, 1], columns=["A", "B"])
        df5 = df.append(df3, sort=sort)

        expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
        tm.assert_frame_equal(df5, expected)

    def test_append_records(self):
        arr1 = np.zeros((2,), dtype=("i4,f4,a10"))
        arr1[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]

        arr2 = np.zeros((3,), dtype=("i4,f4,a10"))
        arr2[:] = [(3, 4.0, "foo"), (5, 6.0, "bar"), (7.0, 8.0, "baz")]

        df1 = DataFrame(arr1)
        df2 = DataFrame(arr2)

        result = df1.append(df2, ignore_index=True)
        expected = DataFrame(np.concatenate((arr1, arr2)))
        tm.assert_frame_equal(result, expected)

    # rewrite sort fixture, since we also want to test default of None
    def test_append_sorts(self, sort):
        df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
        df2 = pd.DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3])

        with tm.assert_produces_warning(None):
            result = df1.append(df2, sort=sort)

        # for None / True
        expected = pd.DataFrame(
            {"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]},
            columns=["a", "b", "c"],
        )
        if sort is False:
            expected = expected[["b", "a", "c"]]
        tm.assert_frame_equal(result, expected)

    def test_append_different_columns(self, sort):
        df = DataFrame(
            {
                "bools": np.random.randn(10) > 0,
                "ints": np.random.randint(0, 10, 10),
                "floats": np.random.randn(10),
                "strings": ["foo", "bar"] * 5,
            }
        )

        a = df[:5].loc[:, ["bools", "ints", "floats"]]
        b = df[5:].loc[:, ["strings", "ints", "floats"]]

        appended = a.append(b, sort=sort)
        assert isna(appended["strings"][0:4]).all()
        assert isna(appended["bools"][5:]).all()

    def test_append_many(self, sort, float_frame):
        chunks = [
            float_frame[:5],
            float_frame[5:10],
            float_frame[10:15],
            float_frame[15:],
        ]

        result = chunks[0].append(chunks[1:])
        tm.assert_frame_equal(result, float_frame)

        chunks[-1] = chunks[-1].copy()
        chunks[-1]["foo"] = "bar"
        result = chunks[0].append(chunks[1:], sort=sort)
        tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame)
        assert (result["foo"][15:] == "bar").all()
        assert result["foo"][:15].isna().all()

    def test_append_preserve_index_name(self):
        # #980
        df1 = DataFrame(columns=["A", "B", "C"])
        df1 = df1.set_index(["A"])
        df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
        df2 = df2.set_index(["A"])

        result = df1.append(df2)
        assert result.index.name == "A"

    indexes_can_append = [
        pd.RangeIndex(3),
        pd.Index([4, 5, 6]),
        pd.Index([4.5, 5.5, 6.5]),
        pd.Index(list("abc")),
        pd.CategoricalIndex("A B C".split()),
        pd.CategoricalIndex("D E F".split(), ordered=True),
        pd.IntervalIndex.from_breaks([7, 8, 9, 10]),
        pd.DatetimeIndex(
            [
                dt.datetime(2013, 1, 3, 0, 0),
                dt.datetime(2013, 1, 3, 6, 10),
                dt.datetime(2013, 1, 3, 7, 12),
            ]
        ),
    ]

    indexes_cannot_append_with_other = [
        pd.MultiIndex.from_arrays(["A B C".split(), "D E F".split()])
    ]

    all_indexes = indexes_can_append + indexes_cannot_append_with_other

    @pytest.mark.parametrize("index", all_indexes, ids=lambda x: type(x).__name__)
    def test_append_same_columns_type(self, index):
        # GH18359

        # df wider than ser
        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index)
        ser_index = index[:2]
        ser = pd.Series([7, 8], index=ser_index, name=2)
        result = df.append(ser)
        expected = pd.DataFrame(
            [[1.0, 2.0, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index
        )
        tm.assert_frame_equal(result, expected)

        # ser wider than df
        ser_index = index
        index = index[:2]
        df = pd.DataFrame([[1, 2], [4, 5]], columns=index)
        ser = pd.Series([7, 8, 9], index=ser_index, name=2)
        result = df.append(ser)
        expected = pd.DataFrame(
            [[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
            index=[0, 1, 2],
            columns=ser_index,
        )
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "df_columns, series_index",
        combinations(indexes_can_append, r=2),
        ids=lambda x: type(x).__name__,
    )
    def test_append_different_columns_types(self, df_columns, series_index):
        # GH18359
        # See also test 'test_append_different_columns_types_raises' below
        # for errors raised when appending

        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
        ser = pd.Series([7, 8, 9], index=series_index, name=2)

        result = df.append(ser)
        idx_diff = ser.index.difference(df_columns)
        combined_columns = Index(df_columns.tolist()).append(idx_diff)
        expected = pd.DataFrame(
            [
                [1.0, 2.0, 3.0, np.nan, np.nan, np.nan],
                [4, 5, 6, np.nan, np.nan, np.nan],
                [np.nan, np.nan, np.nan, 7, 8, 9],
            ],
            index=[0, 1, 2],
            columns=combined_columns,
        )
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "index_can_append", indexes_can_append, ids=lambda x: type(x).__name__
    )
    @pytest.mark.parametrize(
        "index_cannot_append_with_other",
        indexes_cannot_append_with_other,
        ids=lambda x: type(x).__name__,
    )
    def test_append_different_columns_types_raises(
        self, index_can_append, index_cannot_append_with_other
    ):
        # GH18359
        # Dataframe.append will raise if MultiIndex appends
        # or is appended to a different index type
        #
        # See also test 'test_append_different_columns_types' above for
        # appending without raising.

        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append)
        ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other, name=2)
        msg = (
            r"Expected tuple, got (int|long|float|str|"
            r"pandas._libs.interval.Interval)|"
            r"object of type '(int|float|Timestamp|"
            r"pandas._libs.interval.Interval)' has no len\(\)|"
        )
        with pytest.raises(TypeError, match=msg):
            df.append(ser)

        df = pd.DataFrame(
            [[1, 2, 3], [4, 5, 6]], columns=index_cannot_append_with_other
        )
        ser = pd.Series([7, 8, 9], index=index_can_append, name=2)

        with pytest.raises(TypeError, match=msg):
            df.append(ser)

    def test_append_dtype_coerce(self, sort):

        # GH 4993
        # appending with datetime will incorrectly convert datetime64

        df1 = DataFrame(
            index=[1, 2],
            data=[dt.datetime(2013, 1, 1, 0, 0), dt.datetime(2013, 1, 2, 0, 0)],
            columns=["start_time"],
        )
        df2 = DataFrame(
            index=[4, 5],
            data=[
                [dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10)],
                [dt.datetime(2013, 1, 4, 0, 0), dt.datetime(2013, 1, 4, 7, 10)],
            ],
            columns=["start_time", "end_time"],
        )

        expected = concat(
            [
                Series(
                    [
                        pd.NaT,
                        pd.NaT,
                        dt.datetime(2013, 1, 3, 6, 10),
                        dt.datetime(2013, 1, 4, 7, 10),
                    ],
                    name="end_time",
                ),
                Series(
                    [
                        dt.datetime(2013, 1, 1, 0, 0),
                        dt.datetime(2013, 1, 2, 0, 0),
                        dt.datetime(2013, 1, 3, 0, 0),
                        dt.datetime(2013, 1, 4, 0, 0),
                    ],
                    name="start_time",
                ),
            ],
            axis=1,
            sort=sort,
        )
        result = df1.append(df2, ignore_index=True, sort=sort)
        if sort:
            expected = expected[["end_time", "start_time"]]
        else:
            expected = expected[["start_time", "end_time"]]

        tm.assert_frame_equal(result, expected)

    def test_append_missing_column_proper_upcast(self, sort):
        df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")})
        df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)})

        appended = df1.append(df2, ignore_index=True, sort=sort)
        assert appended["A"].dtype == "f8"
        assert appended["B"].dtype == "O"

    def test_append_empty_frame_to_series_with_dateutil_tz(self):
        # GH 23682
        date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
        s = Series({"date": date, "a": 1.0, "b": 2.0})
        df = DataFrame(columns=["c", "d"])
        result_a = df.append(s, ignore_index=True)
        expected = DataFrame(
            [[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
        )
        # These columns get cast to object after append
        expected["c"] = expected["c"].astype(object)
        expected["d"] = expected["d"].astype(object)
        tm.assert_frame_equal(result_a, expected)

        expected = DataFrame(
            [[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"]
        )
        expected["c"] = expected["c"].astype(object)
        expected["d"] = expected["d"].astype(object)

        result_b = result_a.append(s, ignore_index=True)
        tm.assert_frame_equal(result_b, expected)

        # column order is different
        expected = expected[["c", "d", "date", "a", "b"]]
        result = df.append([s, s], ignore_index=True)
        tm.assert_frame_equal(result, expected)


class TestConcatenate:
    def test_concat_copy(self):
        df = DataFrame(np.random.randn(4, 3))
        df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1))
        df3 = DataFrame({5: "foo"}, index=range(4))

        # These are actual copies.
        result = concat([df, df2, df3], axis=1, copy=True)

        for b in result._mgr.blocks:
            assert b.values.base is None

        # These are the same.
        result = concat([df, df2, df3], axis=1, copy=False)

        for b in result._mgr.blocks:
            if b.is_float:
                assert b.values.base is df._mgr.blocks[0].values.base
            elif b.is_integer:
                assert b.values.base is df2._mgr.blocks[0].values.base
            elif b.is_object:
                assert b.values.base is not None

        # Float block was consolidated.
        df4 = DataFrame(np.random.randn(4, 1))
        result = concat([df, df2, df3, df4], axis=1, copy=False)
        for b in result._mgr.blocks:
            if b.is_float:
                assert b.values.base is None
            elif b.is_integer:
                assert b.values.base is df2._mgr.blocks[0].values.base
            elif b.is_object:
                assert b.values.base is not None

    def test_concat_with_group_keys(self):
        df = DataFrame(np.random.randn(4, 3))
        df2 = DataFrame(np.random.randn(4, 4))

        # axis=0
        df = DataFrame(np.random.randn(3, 4))
        df2 = DataFrame(np.random.randn(4, 4))

        result = concat([df, df2], keys=[0, 1])
        exp_index = MultiIndex.from_arrays(
            [[0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 0, 1, 2, 3]]
        )
        expected = DataFrame(np.r_[df.values, df2.values], index=exp_index)
        tm.assert_frame_equal(result, expected)

        result = concat([df, df], keys=[0, 1])
        exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
        expected = DataFrame(np.r_[df.values, df.values], index=exp_index2)
        tm.assert_frame_equal(result, expected)

        # axis=1
        df = DataFrame(np.random.randn(4, 3))
        df2 = DataFrame(np.random.randn(4, 4))

        result = concat([df, df2], keys=[0, 1], axis=1)
        expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index)
        tm.assert_frame_equal(result, expected)

        result = concat([df, df], keys=[0, 1], axis=1)
        expected = DataFrame(np.c_[df.values, df.values], columns=exp_index2)
        tm.assert_frame_equal(result, expected)

    def test_concat_keys_specific_levels(self):
        df = DataFrame(np.random.randn(10, 4))
        pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]]
        level = ["three", "two", "one", "zero"]
        result = concat(
            pieces,
            axis=1,
            keys=["one", "two", "three"],
            levels=[level],
            names=["group_key"],
        )

        tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key"))
        tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3]))

        assert result.columns.names == ["group_key", None]

    def test_concat_dataframe_keys_bug(self, sort):
        t1 = DataFrame(
            {"value": Series([1, 2, 3], index=Index(["a", "b", "c"], name="id"))}
        )
        t2 = DataFrame({"value": Series([7, 8], index=Index(["a", "b"], name="id"))})

        # it works
        result = concat([t1, t2], axis=1, keys=["t1", "t2"], sort=sort)
        assert list(result.columns) == [("t1", "value"), ("t2", "value")]

    def test_concat_series_partial_columns_names(self):
        # GH10698
        foo = Series([1, 2], name="foo")
        bar = Series([1, 2])
        baz = Series([4, 5])

        result = concat([foo, bar, baz], axis=1)
        expected = DataFrame(
            {"foo": [1, 2], 0: [1, 2], 1: [4, 5]}, columns=["foo", 0, 1]
        )
        tm.assert_frame_equal(result, expected)

        result = concat([foo, bar, baz], axis=1, keys=["red", "blue", "yellow"])
        expected = DataFrame(
            {"red": [1, 2], "blue": [1, 2], "yellow": [4, 5]},
            columns=["red", "blue", "yellow"],
        )
        tm.assert_frame_equal(result, expected)

        result = concat([foo, bar, baz], axis=1, ignore_index=True)
        expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]})
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("mapping", ["mapping", "dict"])
    def test_concat_mapping(self, mapping, non_dict_mapping_subclass):
        constructor = dict if mapping == "dict" else non_dict_mapping_subclass
        frames = constructor(
            {
                "foo": DataFrame(np.random.randn(4, 3)),
                "bar": DataFrame(np.random.randn(4, 3)),
                "baz": DataFrame(np.random.randn(4, 3)),
                "qux": DataFrame(np.random.randn(4, 3)),
            }
        )

        sorted_keys = list(frames.keys())

        result = concat(frames)
        expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys)
        tm.assert_frame_equal(result, expected)

        result = concat(frames, axis=1)
        expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys, axis=1)
        tm.assert_frame_equal(result, expected)

        keys = ["baz", "foo", "bar"]
        result = concat(frames, keys=keys)
        expected = concat([frames[k] for k in keys], keys=keys)
        tm.assert_frame_equal(result, expected)

    def test_concat_ignore_index(self, sort):
        frame1 = DataFrame(
            {"test1": ["a", "b", "c"], "test2": [1, 2, 3], "test3": [4.5, 3.2, 1.2]}
        )
        frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]})
        frame1.index = Index(["x", "y", "z"])
        frame2.index = Index(["x", "y", "q"])

        v1 = concat([frame1, frame2], axis=1, ignore_index=True, sort=sort)

        nan = np.nan
        expected = DataFrame(
            [
                [nan, nan, nan, 4.3],
                ["a", 1, 4.5, 5.2],
                ["b", 2, 3.2, 2.2],
                ["c", 3, 1.2, nan],
            ],
            index=Index(["q", "x", "y", "z"]),
        )
        if not sort:
            expected = expected.loc[["x", "y", "z", "q"]]

        tm.assert_frame_equal(v1, expected)

    def test_concat_multiindex_with_keys(self):
        index = MultiIndex(
            levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
            codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
            names=["first", "second"],
        )
        frame = DataFrame(
            np.random.randn(10, 3),
            index=index,
            columns=Index(["A", "B", "C"], name="exp"),
        )
        result = concat([frame, frame], keys=[0, 1], names=["iteration"])

        assert result.index.names == ("iteration",) + index.names
        tm.assert_frame_equal(result.loc[0], frame)
        tm.assert_frame_equal(result.loc[1], frame)
        assert result.index.nlevels == 3

    def test_concat_multiindex_with_tz(self):
        # GH 6606
        df = DataFrame(
            {
                "dt": [
                    datetime(2014, 1, 1),
                    datetime(2014, 1, 2),
                    datetime(2014, 1, 3),
                ],
                "b": ["A", "B", "C"],
                "c": [1, 2, 3],
                "d": [4, 5, 6],
            }
        )
        df["dt"] = df["dt"].apply(lambda d: Timestamp(d, tz="US/Pacific"))
        df = df.set_index(["dt", "b"])

        exp_idx1 = DatetimeIndex(
            ["2014-01-01", "2014-01-02", "2014-01-03"] * 2, tz="US/Pacific", name="dt"
        )
        exp_idx2 = Index(["A", "B", "C"] * 2, name="b")
        exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
        expected = DataFrame(
            {"c": [1, 2, 3] * 2, "d": [4, 5, 6] * 2}, index=exp_idx, columns=["c", "d"]
        )

        result = concat([df, df])
        tm.assert_frame_equal(result, expected)

    def test_concat_multiindex_with_none_in_index_names(self):
        # GH 15787
        index = pd.MultiIndex.from_product([[1], range(5)], names=["level1", None])
        df = pd.DataFrame({"col": range(5)}, index=index, dtype=np.int32)

        result = concat([df, df], keys=[1, 2], names=["level2"])
        index = pd.MultiIndex.from_product(
            [[1, 2], [1], range(5)], names=["level2", "level1", None]
        )
        expected = pd.DataFrame(
            {"col": list(range(5)) * 2}, index=index, dtype=np.int32
        )
        tm.assert_frame_equal(result, expected)

        result = concat([df, df[:2]], keys=[1, 2], names=["level2"])
        level2 = [1] * 5 + [2] * 2
        level1 = [1] * 7
        no_name = list(range(5)) + list(range(2))
        tuples = list(zip(level2, level1, no_name))
        index = pd.MultiIndex.from_tuples(tuples, names=["level2", "level1", None])
        expected = pd.DataFrame({"col": no_name}, index=index, dtype=np.int32)
        tm.assert_frame_equal(result, expected)

    def test_concat_keys_and_levels(self):
        df = DataFrame(np.random.randn(1, 3))
        df2 = DataFrame(np.random.randn(1, 4))

        levels = [["foo", "baz"], ["one", "two"]]
        names = ["first", "second"]
        result = concat(
            [df, df2, df, df2],
            keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
            levels=levels,
            names=names,
        )
        expected = concat([df, df2, df, df2])
        exp_index = MultiIndex(
            levels=levels + [[0]],
            codes=[[0, 0, 1, 1], [0, 1, 0, 1], [0, 0, 0, 0]],
            names=names + [None],
        )
        expected.index = exp_index

        tm.assert_frame_equal(result, expected)

        # no names
        result = concat(
            [df, df2, df, df2],
            keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
            levels=levels,
        )
        assert result.index.names == (None,) * 3

        # no levels
        result = concat(
            [df, df2, df, df2],
            keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
            names=["first", "second"],
        )
        assert result.index.names == ("first", "second", None)
        tm.assert_index_equal(
            result.index.levels[0], Index(["baz", "foo"], name="first")
        )

    def test_concat_keys_levels_no_overlap(self):
        # GH #1406
        df = DataFrame(np.random.randn(1, 3), index=["a"])
        df2 = DataFrame(np.random.randn(1, 4), index=["b"])

        msg = "Values not found in passed level"
        with pytest.raises(ValueError, match=msg):
            concat([df, df], keys=["one", "two"], levels=[["foo", "bar", "baz"]])

        msg = "Key one not in level"
        with pytest.raises(ValueError, match=msg):
            concat([df, df2], keys=["one", "two"], levels=[["foo", "bar", "baz"]])

    def test_concat_rename_index(self):
        a = DataFrame(
            np.random.rand(3, 3),
            columns=list("ABC"),
            index=Index(list("abc"), name="index_a"),
        )
        b = DataFrame(
            np.random.rand(3, 3),
            columns=list("ABC"),
            index=Index(list("abc"), name="index_b"),
        )

        result = concat([a, b], keys=["key0", "key1"], names=["lvl0", "lvl1"])

        exp = concat([a, b], keys=["key0", "key1"], names=["lvl0"])
        names = list(exp.index.names)
        names[1] = "lvl1"
        exp.index.set_names(names, inplace=True)

        tm.assert_frame_equal(result, exp)
        assert result.index.names == exp.index.names

    def test_crossed_dtypes_weird_corner(self):
        columns = ["A", "B", "C", "D"]
        df1 = DataFrame(
            {
                "A": np.array([1, 2, 3, 4], dtype="f8"),
                "B": np.array([1, 2, 3, 4], dtype="i8"),
                "C": np.array([1, 2, 3, 4], dtype="f8"),
                "D": np.array([1, 2, 3, 4], dtype="i8"),
            },
            columns=columns,
        )

        df2 = DataFrame(
            {
                "A": np.array([1, 2, 3, 4], dtype="i8"),
                "B": np.array([1, 2, 3, 4], dtype="f8"),
                "C": np.array([1, 2, 3, 4], dtype="i8"),
                "D": np.array([1, 2, 3, 4], dtype="f8"),
            },
            columns=columns,
        )

        appended = df1.append(df2, ignore_index=True)
        expected = DataFrame(
            np.concatenate([df1.values, df2.values], axis=0), columns=columns
        )
        tm.assert_frame_equal(appended, expected)

        df = DataFrame(np.random.randn(1, 3), index=["a"])
        df2 = DataFrame(np.random.randn(1, 4), index=["b"])
        result = concat([df, df2], keys=["one", "two"], names=["first", "second"])
        assert result.index.names == ("first", "second")

    def test_dups_index(self):
        # GH 4771

        # single dtypes
        df = DataFrame(
            np.random.randint(0, 10, size=40).reshape(10, 4),
            columns=["A", "A", "C", "C"],
        )

        result = concat([df, df], axis=1)
        tm.assert_frame_equal(result.iloc[:, :4], df)
        tm.assert_frame_equal(result.iloc[:, 4:], df)

        result = concat([df, df], axis=0)
        tm.assert_frame_equal(result.iloc[:10], df)
        tm.assert_frame_equal(result.iloc[10:], df)

        # multi dtypes
        df = concat(
            [
                DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]),
                DataFrame(
                    np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]
                ),
            ],
            axis=1,
        )

        result = concat([df, df], axis=1)
        tm.assert_frame_equal(result.iloc[:, :6], df)
        tm.assert_frame_equal(result.iloc[:, 6:], df)

        result = concat([df, df], axis=0)
        tm.assert_frame_equal(result.iloc[:10], df)
        tm.assert_frame_equal(result.iloc[10:], df)

        # append
        result = df.iloc[0:8, :].append(df.iloc[8:])
        tm.assert_frame_equal(result, df)

        result = df.iloc[0:8, :].append(df.iloc[8:9]).append(df.iloc[9:10])
        tm.assert_frame_equal(result, df)

        expected = concat([df, df], axis=0)
        result = df.append(df)
        tm.assert_frame_equal(result, expected)

    def test_with_mixed_tuples(self, sort):
        # 10697
        # columns have mixed tuples, so handle properly
        df1 = DataFrame({"A": "foo", ("B", 1): "bar"}, index=range(2))
        df2 = DataFrame({"B": "foo", ("B", 1): "bar"}, index=range(2))

        # it works
        concat([df1, df2], sort=sort)

    def test_handle_empty_objects(self, sort):
        df = DataFrame(np.random.randn(10, 4), columns=list("abcd"))

        baz = df[:5].copy()
        baz["foo"] = "bar"
        empty = df[5:5]

        frames = [baz, empty, empty, df[5:]]
        concatted = concat(frames, axis=0, sort=sort)

        expected = df.reindex(columns=["a", "b", "c", "d", "foo"])
        expected["foo"] = expected["foo"].astype("O")
        expected.loc[0:4, "foo"] = "bar"

        tm.assert_frame_equal(concatted, expected)

        # empty as first element with time series
        # GH3259
        df = DataFrame(
            dict(A=range(10000)), index=date_range("20130101", periods=10000, freq="s")
        )
        empty = DataFrame()
        result = concat([df, empty], axis=1)
        tm.assert_frame_equal(result, df)
        result = concat([empty, df], axis=1)
        tm.assert_frame_equal(result, df)

        result = concat([df, empty])
        tm.assert_frame_equal(result, df)
        result = concat([empty, df])
        tm.assert_frame_equal(result, df)

    def test_concat_mixed_objs(self):

        # concat mixed series/frames
        # G2385

        # axis 1
        index = date_range("01-Jan-2013", periods=10, freq="H")
        arr = np.arange(10, dtype="int64")
        s1 = Series(arr, index=index)
        s2 = Series(arr, index=index)
        df = DataFrame(arr.reshape(-1, 1), index=index)

        expected = DataFrame(
            np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 0]
        )
        result = concat([df, df], axis=1)
        tm.assert_frame_equal(result, expected)

        expected = DataFrame(
            np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 1]
        )
        result = concat([s1, s2], axis=1)
        tm.assert_frame_equal(result, expected)

        expected = DataFrame(
            np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2]
        )
        result = concat([s1, s2, s1], axis=1)
        tm.assert_frame_equal(result, expected)

        expected = DataFrame(
            np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 1, 2, 3]
        )
        result = concat([s1, df, s2, s2, s1], axis=1)
        tm.assert_frame_equal(result, expected)

        # with names
        s1.name = "foo"
        expected = DataFrame(
            np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, 0]
        )
        result = concat([s1, df, s2], axis=1)
        tm.assert_frame_equal(result, expected)

        s2.name = "bar"
        expected = DataFrame(
            np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, "bar"]
        )
        result = concat([s1, df, s2], axis=1)
        tm.assert_frame_equal(result, expected)

        # ignore index
        expected = DataFrame(
            np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2]
        )
        result = concat([s1, df, s2], axis=1, ignore_index=True)
        tm.assert_frame_equal(result, expected)

        # axis 0
        expected = DataFrame(
            np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0]
        )
        result = concat([s1, df, s2])
        tm.assert_frame_equal(result, expected)

        expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0])
        result = concat([s1, df, s2], ignore_index=True)
        tm.assert_frame_equal(result, expected)

    def test_empty_dtype_coerce(self):

        # xref to #12411
        # xref to #12045
        # xref to #11594
        # see below

        # 10571
        df1 = DataFrame(data=[[1, None], [2, None]], columns=["a", "b"])
        df2 = DataFrame(data=[[3, None], [4, None]], columns=["a", "b"])
        result = concat([df1, df2])
        expected = df1.dtypes
        tm.assert_series_equal(result.dtypes, expected)

    def test_dtype_coerceion(self):

        # 12411
        df = DataFrame({"date": [pd.Timestamp("20130101").tz_localize("UTC"), pd.NaT]})

        result = concat([df.iloc[[0]], df.iloc[[1]]])
        tm.assert_series_equal(result.dtypes, df.dtypes)

        # 12045
        import datetime

        df = DataFrame(
            {"date": [datetime.datetime(2012, 1, 1), datetime.datetime(1012, 1, 2)]}
        )
        result = concat([df.iloc[[0]], df.iloc[[1]]])
        tm.assert_series_equal(result.dtypes, df.dtypes)

        # 11594
        df = DataFrame({"text": ["some words"] + [None] * 9})
        result = concat([df.iloc[[0]], df.iloc[[1]]])
        tm.assert_series_equal(result.dtypes, df.dtypes)

    def test_concat_series(self):

        ts = tm.makeTimeSeries()
        ts.name = "foo"

        pieces = [ts[:5], ts[5:15], ts[15:]]

        result = concat(pieces)
        tm.assert_series_equal(result, ts)
        assert result.name == ts.name

        result = concat(pieces, keys=[0, 1, 2])
        expected = ts.copy()

        ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]"))

        exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts))]
        exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes)
        expected.index = exp_index
        tm.assert_series_equal(result, expected)

    def test_concat_series_axis1(self, sort=sort):
        ts = tm.makeTimeSeries()

        pieces = [ts[:-2], ts[2:], ts[2:-2]]

        result = concat(pieces, axis=1)
        expected = DataFrame(pieces).T
        tm.assert_frame_equal(result, expected)

        result = concat(pieces, keys=["A", "B", "C"], axis=1)
        expected = DataFrame(pieces, index=["A", "B", "C"]).T
        tm.assert_frame_equal(result, expected)

        # preserve series names, #2489
        s = Series(randn(5), name="A")
        s2 = Series(randn(5), name="B")

        result = concat([s, s2], axis=1)
        expected = DataFrame({"A": s, "B": s2})
        tm.assert_frame_equal(result, expected)

        s2.name = None
        result = concat([s, s2], axis=1)
        tm.assert_index_equal(result.columns, Index(["A", 0], dtype="object"))

        # must reindex, #2603
        s = Series(randn(3), index=["c", "a", "b"], name="A")
        s2 = Series(randn(4), index=["d", "a", "b", "c"], name="B")
        result = concat([s, s2], axis=1, sort=sort)
        expected = DataFrame({"A": s, "B": s2})
        tm.assert_frame_equal(result, expected)

    def test_concat_series_axis1_names_applied(self):
        # ensure names argument is not ignored on axis=1, #23490
        s = Series([1, 2, 3])
        s2 = Series([4, 5, 6])
        result = concat([s, s2], axis=1, keys=["a", "b"], names=["A"])
        expected = DataFrame(
            [[1, 4], [2, 5], [3, 6]], columns=pd.Index(["a", "b"], name="A")
        )
        tm.assert_frame_equal(result, expected)

        result = concat([s, s2], axis=1, keys=[("a", 1), ("b", 2)], names=["A", "B"])
        expected = DataFrame(
            [[1, 4], [2, 5], [3, 6]],
            columns=MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["A", "B"]),
        )
        tm.assert_frame_equal(result, expected)

    def test_concat_single_with_key(self):
        df = DataFrame(np.random.randn(10, 4))

        result = concat([df], keys=["foo"])
        expected = concat([df, df], keys=["foo", "bar"])
        tm.assert_frame_equal(result, expected[:10])

    def test_concat_exclude_none(self):
        df = DataFrame(np.random.randn(10, 4))

        pieces = [df[:5], None, None, df[5:]]
        result = concat(pieces)
        tm.assert_frame_equal(result, df)
        with pytest.raises(ValueError, match="All objects passed were None"):
            concat([None, None])

    def test_concat_datetime64_block(self):
        from pandas.core.indexes.datetimes import date_range

        rng = date_range("1/1/2000", periods=10)

        df = DataFrame({"time": rng})

        result = concat([df, df])
        assert (result.iloc[:10]["time"] == rng).all()
        assert (result.iloc[10:]["time"] == rng).all()

    def test_concat_timedelta64_block(self):
        from pandas import to_timedelta

        rng = to_timedelta(np.arange(10), unit="s")

        df = DataFrame({"time": rng})

        result = concat([df, df])
        assert (result.iloc[:10]["time"] == rng).all()
        assert (result.iloc[10:]["time"] == rng).all()

    def test_concat_keys_with_none(self):
        # #1649
        df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]])

        result = concat(dict(a=None, b=df0, c=df0[:2], d=df0[:1], e=df0))
        expected = concat(dict(b=df0, c=df0[:2], d=df0[:1], e=df0))
        tm.assert_frame_equal(result, expected)

        result = concat(
            [None, df0, df0[:2], df0[:1], df0], keys=["a", "b", "c", "d", "e"]
        )
        expected = concat([df0, df0[:2], df0[:1], df0], keys=["b", "c", "d", "e"])
        tm.assert_frame_equal(result, expected)

    def test_concat_bug_1719(self):
        ts1 = tm.makeTimeSeries()
        ts2 = tm.makeTimeSeries()[::2]

        # to join with union
        # these two are of different length!
        left = concat([ts1, ts2], join="outer", axis=1)
        right = concat([ts2, ts1], join="outer", axis=1)

        assert len(left) == len(right)

    def test_concat_bug_2972(self):
        ts0 = Series(np.zeros(5))
        ts1 = Series(np.ones(5))
        ts0.name = ts1.name = "same name"
        result = concat([ts0, ts1], axis=1)

        expected = DataFrame({0: ts0, 1: ts1})
        expected.columns = ["same name", "same name"]
        tm.assert_frame_equal(result, expected)

    def test_concat_bug_3602(self):

        # GH 3602, duplicate columns
        df1 = DataFrame(
            {
                "firmNo": [0, 0, 0, 0],
                "prc": [6, 6, 6, 6],
                "stringvar": ["rrr", "rrr", "rrr", "rrr"],
            }
        )
        df2 = DataFrame(
            {"C": [9, 10, 11, 12], "misc": [1, 2, 3, 4], "prc": [6, 6, 6, 6]}
        )
        expected = DataFrame(
            [
                [0, 6, "rrr", 9, 1, 6],
                [0, 6, "rrr", 10, 2, 6],
                [0, 6, "rrr", 11, 3, 6],
                [0, 6, "rrr", 12, 4, 6],
            ]
        )
        expected.columns = ["firmNo", "prc", "stringvar", "C", "misc", "prc"]

        result = concat([df1, df2], axis=1)
        tm.assert_frame_equal(result, expected)

    def test_concat_inner_join_empty(self):
        # GH 15328
        df_empty = pd.DataFrame()
        df_a = pd.DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64")
        df_expected = pd.DataFrame({"a": []}, index=[], dtype="int64")

        for how, expected in [("inner", df_expected), ("outer", df_a)]:
            result = pd.concat([df_a, df_empty], axis=1, join=how)
            tm.assert_frame_equal(result, expected)

    def test_concat_series_axis1_same_names_ignore_index(self):
        dates = date_range("01-Jan-2013", "01-Jan-2014", freq="MS")[0:-1]
        s1 = Series(randn(len(dates)), index=dates, name="value")
        s2 = Series(randn(len(dates)), index=dates, name="value")

        result = concat([s1, s2], axis=1, ignore_index=True)
        expected = Index([0, 1])

        tm.assert_index_equal(result.columns, expected)

    def test_concat_iterables(self):
        # GH8645 check concat works with tuples, list, generators, and weird
        # stuff like deque and custom iterables
        df1 = DataFrame([1, 2, 3])
        df2 = DataFrame([4, 5, 6])
        expected = DataFrame([1, 2, 3, 4, 5, 6])
        tm.assert_frame_equal(concat((df1, df2), ignore_index=True), expected)
        tm.assert_frame_equal(concat([df1, df2], ignore_index=True), expected)
        tm.assert_frame_equal(
            concat((df for df in (df1, df2)), ignore_index=True), expected
        )
        tm.assert_frame_equal(concat(deque((df1, df2)), ignore_index=True), expected)

        class CustomIterator1:
            def __len__(self) -> int:
                return 2

            def __getitem__(self, index):
                try:
                    return {0: df1, 1: df2}[index]
                except KeyError as err:
                    raise IndexError from err

        tm.assert_frame_equal(pd.concat(CustomIterator1(), ignore_index=True), expected)

        class CustomIterator2(abc.Iterable):
            def __iter__(self):
                yield df1
                yield df2

        tm.assert_frame_equal(pd.concat(CustomIterator2(), ignore_index=True), expected)

    def test_concat_invalid(self):

        # trying to concat a ndframe with a non-ndframe
        df1 = tm.makeCustomDataframe(10, 2)
        for obj in [1, dict(), [1, 2], (1, 2)]:

            msg = (
                f"cannot concatenate object of type '{type(obj)}'; "
                "only Series and DataFrame objs are valid"
            )
            with pytest.raises(TypeError, match=msg):
                concat([df1, obj])

    def test_concat_invalid_first_argument(self):
        df1 = tm.makeCustomDataframe(10, 2)
        df2 = tm.makeCustomDataframe(10, 2)
        msg = (
            "first argument must be an iterable of pandas "
            'objects, you passed an object of type "DataFrame"'
        )
        with pytest.raises(TypeError, match=msg):
            concat(df1, df2)

        # generator ok though
        concat(DataFrame(np.random.rand(5, 5)) for _ in range(3))

        # text reader ok
        # GH6583
        data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""

        reader = read_csv(StringIO(data), chunksize=1)
        result = concat(reader, ignore_index=True)
        expected = read_csv(StringIO(data))
        tm.assert_frame_equal(result, expected)

    def test_concat_NaT_series(self):
        # GH 11693
        # test for merging NaT series with datetime series.
        x = Series(
            date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="US/Eastern")
        )
        y = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]")
        expected = Series([x[0], x[1], pd.NaT, pd.NaT])

        result = concat([x, y], ignore_index=True)
        tm.assert_series_equal(result, expected)

        # all NaT with tz
        expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns, US/Eastern]")
        result = pd.concat([y, y], ignore_index=True)
        tm.assert_series_equal(result, expected)

        # without tz
        x = pd.Series(pd.date_range("20151124 08:00", "20151124 09:00", freq="1h"))
        y = pd.Series(pd.date_range("20151124 10:00", "20151124 11:00", freq="1h"))
        y[:] = pd.NaT
        expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT])
        result = pd.concat([x, y], ignore_index=True)
        tm.assert_series_equal(result, expected)

        # all NaT without tz
        x[:] = pd.NaT
        expected = pd.Series(pd.NaT, index=range(4), dtype="datetime64[ns]")
        result = pd.concat([x, y], ignore_index=True)
        tm.assert_series_equal(result, expected)

    def test_concat_tz_frame(self):
        df2 = DataFrame(
            dict(
                A=pd.Timestamp("20130102", tz="US/Eastern"),
                B=pd.Timestamp("20130603", tz="CET"),
            ),
            index=range(5),
        )

        # concat
        df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
        tm.assert_frame_equal(df2, df3)

    def test_concat_tz_series(self):
        # gh-11755: tz and no tz
        x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC"))
        y = Series(date_range("2012-01-01", "2012-01-02"))
        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
        result = concat([x, y], ignore_index=True)
        tm.assert_series_equal(result, expected)

        # gh-11887: concat tz and object
        x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC"))
        y = Series(["a", "b"])
        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
        result = concat([x, y], ignore_index=True)
        tm.assert_series_equal(result, expected)

        # see gh-12217 and gh-12306
        # Concatenating two UTC times
        first = pd.DataFrame([[datetime(2016, 1, 1)]])
        first[0] = first[0].dt.tz_localize("UTC")

        second = pd.DataFrame([[datetime(2016, 1, 2)]])
        second[0] = second[0].dt.tz_localize("UTC")

        result = pd.concat([first, second])
        assert result[0].dtype == "datetime64[ns, UTC]"

        # Concatenating two London times
        first = pd.DataFrame([[datetime(2016, 1, 1)]])
        first[0] = first[0].dt.tz_localize("Europe/London")

        second = pd.DataFrame([[datetime(2016, 1, 2)]])
        second[0] = second[0].dt.tz_localize("Europe/London")

        result = pd.concat([first, second])
        assert result[0].dtype == "datetime64[ns, Europe/London]"

        # Concatenating 2+1 London times
        first = pd.DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]])
        first[0] = first[0].dt.tz_localize("Europe/London")

        second = pd.DataFrame([[datetime(2016, 1, 3)]])
        second[0] = second[0].dt.tz_localize("Europe/London")

        result = pd.concat([first, second])
        assert result[0].dtype == "datetime64[ns, Europe/London]"

        # Concat'ing 1+2 London times
        first = pd.DataFrame([[datetime(2016, 1, 1)]])
        first[0] = first[0].dt.tz_localize("Europe/London")

        second = pd.DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]])
        second[0] = second[0].dt.tz_localize("Europe/London")

        result = pd.concat([first, second])
        assert result[0].dtype == "datetime64[ns, Europe/London]"

    def test_concat_tz_series_with_datetimelike(self):
        # see gh-12620: tz and timedelta
        x = [
            pd.Timestamp("2011-01-01", tz="US/Eastern"),
            pd.Timestamp("2011-02-01", tz="US/Eastern"),
        ]
        y = [pd.Timedelta("1 day"), pd.Timedelta("2 day")]
        result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
        tm.assert_series_equal(result, pd.Series(x + y, dtype="object"))

        # tz and period
        y = [pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M")]
        result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
        tm.assert_series_equal(result, pd.Series(x + y, dtype="object"))

    def test_concat_tz_series_tzlocal(self):
        # see gh-13583
        x = [
            pd.Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()),
            pd.Timestamp("2011-02-01", tz=dateutil.tz.tzlocal()),
        ]
        y = [
            pd.Timestamp("2012-01-01", tz=dateutil.tz.tzlocal()),
            pd.Timestamp("2012-02-01", tz=dateutil.tz.tzlocal()),
        ]

        result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
        tm.assert_series_equal(result, pd.Series(x + y))
        assert result.dtype == "datetime64[ns, tzlocal()]"

    @pytest.mark.parametrize("tz1", [None, "UTC"])
    @pytest.mark.parametrize("tz2", [None, "UTC"])
    @pytest.mark.parametrize("s", [pd.NaT, pd.Timestamp("20150101")])
    def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s):
        # GH 12396

        # tz-naive
        first = pd.DataFrame([[pd.NaT], [pd.NaT]]).apply(
            lambda x: x.dt.tz_localize(tz1)
        )
        second = pd.DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2))

        result = pd.concat([first, second], axis=0)
        expected = pd.DataFrame(pd.Series([pd.NaT, pd.NaT, s], index=[0, 1, 0]))
        expected = expected.apply(lambda x: x.dt.tz_localize(tz2))
        if tz1 != tz2:
            expected = expected.astype(object)

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("tz1", [None, "UTC"])
    @pytest.mark.parametrize("tz2", [None, "UTC"])
    def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2):
        # GH 12396

        first = pd.DataFrame(pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1))
        second = pd.DataFrame(pd.Series([pd.NaT]).dt.tz_localize(tz2), columns=[1])
        expected = pd.DataFrame(
            {
                0: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1),
                1: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2),
            }
        )
        result = pd.concat([first, second], axis=1)
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("tz1", [None, "UTC"])
    @pytest.mark.parametrize("tz2", [None, "UTC"])
    def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2):
        # GH 12396

        # tz-naive
        first = pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)
        second = pd.DataFrame(
            [
                [pd.Timestamp("2015/01/01", tz=tz2)],
                [pd.Timestamp("2016/01/01", tz=tz2)],
            ],
            index=[2, 3],
        )

        expected = pd.DataFrame(
            [
                pd.NaT,
                pd.NaT,
                pd.Timestamp("2015/01/01", tz=tz2),
                pd.Timestamp("2016/01/01", tz=tz2),
            ]
        )
        if tz1 != tz2:
            expected = expected.astype(object)

        result = pd.concat([first, second])
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("tz", [None, "UTC"])
    def test_concat_NaT_dataframes(self, tz):
        # GH 12396

        first = pd.DataFrame([[pd.NaT], [pd.NaT]])
        first = first.apply(lambda x: x.dt.tz_localize(tz))
        second = pd.DataFrame(
            [[pd.Timestamp("2015/01/01", tz=tz)], [pd.Timestamp("2016/01/01", tz=tz)]],
            index=[2, 3],
        )
        expected = pd.DataFrame(
            [
                pd.NaT,
                pd.NaT,
                pd.Timestamp("2015/01/01", tz=tz),
                pd.Timestamp("2016/01/01", tz=tz),
            ]
        )

        result = pd.concat([first, second], axis=0)
        tm.assert_frame_equal(result, expected)

    def test_concat_period_series(self):
        x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
        y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="D"))
        expected = Series([x[0], x[1], y[0], y[1]], dtype="Period[D]")
        result = concat([x, y], ignore_index=True)
        tm.assert_series_equal(result, expected)

    def test_concat_period_multiple_freq_series(self):
        x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
        y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="M"))
        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
        result = concat([x, y], ignore_index=True)
        tm.assert_series_equal(result, expected)
        assert result.dtype == "object"

    def test_concat_period_other_series(self):
        x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
        y = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="M"))
        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
        result = concat([x, y], ignore_index=True)
        tm.assert_series_equal(result, expected)
        assert result.dtype == "object"

        # non-period
        x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
        y = Series(pd.DatetimeIndex(["2015-11-01", "2015-12-01"]))
        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
        result = concat([x, y], ignore_index=True)
        tm.assert_series_equal(result, expected)
        assert result.dtype == "object"

        x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
        y = Series(["A", "B"])
        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
        result = concat([x, y], ignore_index=True)
        tm.assert_series_equal(result, expected)
        assert result.dtype == "object"

    def test_concat_empty_series(self):
        # GH 11082
        s1 = pd.Series([1, 2, 3], name="x")
        s2 = pd.Series(name="y", dtype="float64")
        res = pd.concat([s1, s2], axis=1)
        exp = pd.DataFrame(
            {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]},
            index=pd.Index([0, 1, 2], dtype="O"),
        )
        tm.assert_frame_equal(res, exp)

        s1 = pd.Series([1, 2, 3], name="x")
        s2 = pd.Series(name="y", dtype="float64")
        res = pd.concat([s1, s2], axis=0)
        # name will be reset
        exp = pd.Series([1, 2, 3])
        tm.assert_series_equal(res, exp)

        # empty Series with no name
        s1 = pd.Series([1, 2, 3], name="x")
        s2 = pd.Series(name=None, dtype="float64")
        res = pd.concat([s1, s2], axis=1)
        exp = pd.DataFrame(
            {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
            columns=["x", 0],
            index=pd.Index([0, 1, 2], dtype="O"),
        )
        tm.assert_frame_equal(res, exp)

    @pytest.mark.parametrize("tz", [None, "UTC"])
    @pytest.mark.parametrize("values", [[], [1, 2, 3]])
    def test_concat_empty_series_timelike(self, tz, values):
        # GH 18447

        first = Series([], dtype="M8[ns]").dt.tz_localize(tz)
        dtype = None if values else np.float64
        second = Series(values, dtype=dtype)

        expected = DataFrame(
            {
                0: pd.Series([pd.NaT] * len(values), dtype="M8[ns]").dt.tz_localize(tz),
                1: values,
            }
        )
        result = concat([first, second], axis=1)
        tm.assert_frame_equal(result, expected)

    def test_default_index(self):
        # is_series and ignore_index
        s1 = pd.Series([1, 2, 3], name="x")
        s2 = pd.Series([4, 5, 6], name="y")
        res = pd.concat([s1, s2], axis=1, ignore_index=True)
        assert isinstance(res.columns, pd.RangeIndex)
        exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
        # use check_index_type=True to check the result have
        # RangeIndex (default index)
        tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)

        # is_series and all inputs have no names
        s1 = pd.Series([1, 2, 3])
        s2 = pd.Series([4, 5, 6])
        res = pd.concat([s1, s2], axis=1, ignore_index=False)
        assert isinstance(res.columns, pd.RangeIndex)
        exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
        exp.columns = pd.RangeIndex(2)
        tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)

        # is_dataframe and ignore_index
        df1 = pd.DataFrame({"A": [1, 2], "B": [5, 6]})
        df2 = pd.DataFrame({"A": [3, 4], "B": [7, 8]})

        res = pd.concat([df1, df2], axis=0, ignore_index=True)
        exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"])
        tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)

        res = pd.concat([df1, df2], axis=1, ignore_index=True)
        exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
        tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)

    def test_concat_multiindex_rangeindex(self):
        # GH13542
        # when multi-index levels are RangeIndex objects
        # there is a bug in concat with objects of len 1

        df = DataFrame(np.random.randn(9, 2))
        df.index = MultiIndex(
            levels=[pd.RangeIndex(3), pd.RangeIndex(3)],
            codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)],
        )

        res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]])
        exp = df.iloc[[2, 3, 4, 5], :]
        tm.assert_frame_equal(res, exp)

    def test_concat_multiindex_dfs_with_deepcopy(self):
        # GH 9967
        from copy import deepcopy

        example_multiindex1 = pd.MultiIndex.from_product([["a"], ["b"]])
        example_dataframe1 = pd.DataFrame([0], index=example_multiindex1)

        example_multiindex2 = pd.MultiIndex.from_product([["a"], ["c"]])
        example_dataframe2 = pd.DataFrame([1], index=example_multiindex2)

        example_dict = {"s1": example_dataframe1, "s2": example_dataframe2}
        expected_index = pd.MultiIndex(
            levels=[["s1", "s2"], ["a"], ["b", "c"]],
            codes=[[0, 1], [0, 0], [0, 1]],
            names=["testname", None, None],
        )
        expected = pd.DataFrame([[0], [1]], index=expected_index)
        result_copy = pd.concat(deepcopy(example_dict), names=["testname"])
        tm.assert_frame_equal(result_copy, expected)
        result_no_copy = pd.concat(example_dict, names=["testname"])
        tm.assert_frame_equal(result_no_copy, expected)

    def test_categorical_concat_append(self):
        cat = Categorical(["a", "b"], categories=["a", "b"])
        vals = [1, 2]
        df = DataFrame({"cats": cat, "vals": vals})
        cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
        vals2 = [1, 2, 1, 2]
        exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1]))

        tm.assert_frame_equal(pd.concat([df, df]), exp)
        tm.assert_frame_equal(df.append(df), exp)

        # GH 13524 can concat different categories
        cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
        vals3 = [1, 2]
        df_different_categories = DataFrame({"cats": cat3, "vals": vals3})

        res = pd.concat([df, df_different_categories], ignore_index=True)
        exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
        tm.assert_frame_equal(res, exp)

        res = df.append(df_different_categories, ignore_index=True)
        tm.assert_frame_equal(res, exp)

    def test_categorical_concat_dtypes(self):

        # GH8143
        index = ["cat", "obj", "num"]
        cat = Categorical(["a", "b", "c"])
        obj = Series(["a", "b", "c"])
        num = Series([1, 2, 3])
        df = pd.concat([Series(cat), obj, num], axis=1, keys=index)

        result = df.dtypes == "object"
        expected = Series([False, True, False], index=index)
        tm.assert_series_equal(result, expected)

        result = df.dtypes == "int64"
        expected = Series([False, False, True], index=index)
        tm.assert_series_equal(result, expected)

        result = df.dtypes == "category"
        expected = Series([True, False, False], index=index)
        tm.assert_series_equal(result, expected)

    def test_categorical_concat(self, sort):
        # See GH 10177
        df1 = DataFrame(
            np.arange(18, dtype="int64").reshape(6, 3), columns=["a", "b", "c"]
        )

        df2 = DataFrame(np.arange(14, dtype="int64").reshape(7, 2), columns=["a", "c"])

        cat_values = ["one", "one", "two", "one", "two", "two", "one"]
        df2["h"] = Series(Categorical(cat_values))

        res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort)
        exp = DataFrame(
            {
                "a": [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
                "b": [
                    1,
                    4,
                    7,
                    10,
                    13,
                    16,
                    np.nan,
                    np.nan,
                    np.nan,
                    np.nan,
                    np.nan,
                    np.nan,
                    np.nan,
                ],
                "c": [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13],
                "h": [None] * 6 + cat_values,
            }
        )
        tm.assert_frame_equal(res, exp)

    def test_categorical_concat_gh7864(self):
        # GH 7864
        # make sure ordering is preserved
        df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list("abbaae")})
        df["grade"] = Categorical(df["raw_grade"])
        df["grade"].cat.set_categories(["e", "a", "b"])

        df1 = df[0:3]
        df2 = df[3:]

        tm.assert_index_equal(df["grade"].cat.categories, df1["grade"].cat.categories)
        tm.assert_index_equal(df["grade"].cat.categories, df2["grade"].cat.categories)

        dfx = pd.concat([df1, df2])
        tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories)

        dfa = df1.append(df2)
        tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories)

    def test_categorical_concat_preserve(self):

        # GH 8641  series concat not preserving category dtype
        # GH 13524 can concat different categories
        s = Series(list("abc"), dtype="category")
        s2 = Series(list("abd"), dtype="category")

        exp = Series(list("abcabd"))
        res = pd.concat([s, s2], ignore_index=True)
        tm.assert_series_equal(res, exp)

        exp = Series(list("abcabc"), dtype="category")
        res = pd.concat([s, s], ignore_index=True)
        tm.assert_series_equal(res, exp)

        exp = Series(list("abcabc"), index=[0, 1, 2, 0, 1, 2], dtype="category")
        res = pd.concat([s, s])
        tm.assert_series_equal(res, exp)

        a = Series(np.arange(6, dtype="int64"))
        b = Series(list("aabbca"))

        df2 = DataFrame({"A": a, "B": b.astype(CategoricalDtype(list("cab")))})
        res = pd.concat([df2, df2])
        exp = DataFrame(
            {
                "A": pd.concat([a, a]),
                "B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
            }
        )
        tm.assert_frame_equal(res, exp)

    def test_categorical_index_preserver(self):

        a = Series(np.arange(6, dtype="int64"))
        b = Series(list("aabbca"))

        df2 = DataFrame(
            {"A": a, "B": b.astype(CategoricalDtype(list("cab")))}
        ).set_index("B")
        result = pd.concat([df2, df2])
        expected = DataFrame(
            {
                "A": pd.concat([a, a]),
                "B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
            }
        ).set_index("B")
        tm.assert_frame_equal(result, expected)

        # wrong categories
        df3 = DataFrame(
            {"A": a, "B": Categorical(b, categories=list("abe"))}
        ).set_index("B")
        msg = "categories must match existing categories when appending"
        with pytest.raises(TypeError, match=msg):
            pd.concat([df2, df3])

    def test_concat_categoricalindex(self):
        # GH 16111, categories that aren't lexsorted
        categories = [9, 0, 1, 2, 3]

        a = pd.Series(1, index=pd.CategoricalIndex([9, 0], categories=categories))
        b = pd.Series(2, index=pd.CategoricalIndex([0, 1], categories=categories))
        c = pd.Series(3, index=pd.CategoricalIndex([1, 2], categories=categories))

        result = pd.concat([a, b, c], axis=1)

        exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories)
        exp = pd.DataFrame(
            {
                0: [1, 1, np.nan, np.nan],
                1: [np.nan, 2, 2, np.nan],
                2: [np.nan, np.nan, 3, 3],
            },
            columns=[0, 1, 2],
            index=exp_idx,
        )
        tm.assert_frame_equal(result, exp)

    def test_concat_order(self):
        # GH 17344
        dfs = [pd.DataFrame(index=range(3), columns=["a", 1, None])]
        dfs += [
            pd.DataFrame(index=range(3), columns=[None, 1, "a"]) for i in range(100)
        ]

        result = pd.concat(dfs, sort=True).columns
        expected = dfs[0].columns
        tm.assert_index_equal(result, expected)

    def test_concat_datetime_timezone(self):
        # GH 18523
        idx1 = pd.date_range("2011-01-01", periods=3, freq="H", tz="Europe/Paris")
        idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq="H")
        df1 = pd.DataFrame({"a": [1, 2, 3]}, index=idx1)
        df2 = pd.DataFrame({"b": [1, 2, 3]}, index=idx2)
        result = pd.concat([df1, df2], axis=1)

        exp_idx = (
            DatetimeIndex(
                [
                    "2011-01-01 00:00:00+01:00",
                    "2011-01-01 01:00:00+01:00",
                    "2011-01-01 02:00:00+01:00",
                ],
                freq="H",
            )
            .tz_convert("UTC")
            .tz_convert("Europe/Paris")
        )

        expected = pd.DataFrame(
            [[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=["a", "b"]
        )

        tm.assert_frame_equal(result, expected)

        idx3 = pd.date_range("2011-01-01", periods=3, freq="H", tz="Asia/Tokyo")
        df3 = pd.DataFrame({"b": [1, 2, 3]}, index=idx3)
        result = pd.concat([df1, df3], axis=1)

        exp_idx = DatetimeIndex(
            [
                "2010-12-31 15:00:00+00:00",
                "2010-12-31 16:00:00+00:00",
                "2010-12-31 17:00:00+00:00",
                "2010-12-31 23:00:00+00:00",
                "2011-01-01 00:00:00+00:00",
                "2011-01-01 01:00:00+00:00",
            ]
        )

        expected = pd.DataFrame(
            [
                [np.nan, 1],
                [np.nan, 2],
                [np.nan, 3],
                [1, np.nan],
                [2, np.nan],
                [3, np.nan],
            ],
            index=exp_idx,
            columns=["a", "b"],
        )

        tm.assert_frame_equal(result, expected)

        # GH 13783: Concat after resample
        result = pd.concat(
            [df1.resample("H").mean(), df2.resample("H").mean()], sort=True
        )
        expected = pd.DataFrame(
            {"a": [1, 2, 3] + [np.nan] * 3, "b": [np.nan] * 3 + [1, 2, 3]},
            index=idx1.append(idx1),
        )
        tm.assert_frame_equal(result, expected)

    def test_concat_different_extension_dtypes_upcasts(self):
        a = pd.Series(pd.core.arrays.integer_array([1, 2]))
        b = pd.Series(to_decimal([1, 2]))

        result = pd.concat([a, b], ignore_index=True)
        expected = pd.Series([1, 2, Decimal(1), Decimal(2)], dtype=object)
        tm.assert_series_equal(result, expected)

    def test_concat_odered_dict(self):
        # GH 21510
        expected = pd.concat(
            [pd.Series(range(3)), pd.Series(range(4))], keys=["First", "Another"]
        )
        result = pd.concat(
            OrderedDict(
                [("First", pd.Series(range(3))), ("Another", pd.Series(range(4)))]
            )
        )
        tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("pdt", [pd.Series, pd.DataFrame])
@pytest.mark.parametrize("dt", np.sctypes["float"])
def test_concat_no_unnecessary_upcast(dt, pdt):
    # GH 13247
    dims = pdt(dtype=object).ndim

    dfs = [
        pdt(np.array([1], dtype=dt, ndmin=dims)),
        pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
        pdt(np.array([5], dtype=dt, ndmin=dims)),
    ]
    x = pd.concat(dfs)
    assert x.values.dtype == dt


@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, pd.DataFrame])
@pytest.mark.parametrize("dt", np.sctypes["int"])
def test_concat_will_upcast(dt, pdt):
    with catch_warnings(record=True):
        dims = pdt().ndim
        dfs = [
            pdt(np.array([1], dtype=dt, ndmin=dims)),
            pdt(np.array([np.nan], ndmin=dims)),
            pdt(np.array([5], dtype=dt, ndmin=dims)),
        ]
        x = pd.concat(dfs)
        assert x.values.dtype == "float64"


def test_concat_empty_and_non_empty_frame_regression():
    # GH 18178 regression test
    df1 = pd.DataFrame({"foo": [1]})
    df2 = pd.DataFrame({"foo": []})
    expected = pd.DataFrame({"foo": [1.0]})
    result = pd.concat([df1, df2])
    tm.assert_frame_equal(result, expected)


def test_concat_empty_and_non_empty_series_regression():
    # GH 18187 regression test
    s1 = pd.Series([1])
    s2 = pd.Series([], dtype=object)

    expected = s1
    result = pd.concat([s1, s2])
    tm.assert_series_equal(result, expected)


def test_concat_sorts_columns(sort):
    # GH-4588
    df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
    df2 = pd.DataFrame({"a": [3, 4], "c": [5, 6]})

    # for sort=True/None
    expected = pd.DataFrame(
        {"a": [1, 2, 3, 4], "b": [1, 2, None, None], "c": [None, None, 5, 6]},
        columns=["a", "b", "c"],
    )

    if sort is False:
        expected = expected[["b", "a", "c"]]

    # default
    with tm.assert_produces_warning(None):
        result = pd.concat([df1, df2], ignore_index=True, sort=sort)
    tm.assert_frame_equal(result, expected)


def test_concat_sorts_index(sort):
    df1 = pd.DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"])
    df2 = pd.DataFrame({"b": [1, 2]}, index=["a", "b"])

    # For True/None
    expected = pd.DataFrame(
        {"a": [2, 3, 1], "b": [1, 2, None]}, index=["a", "b", "c"], columns=["a", "b"]
    )
    if sort is False:
        expected = expected.loc[["c", "a", "b"]]

    # Warn and sort by default
    with tm.assert_produces_warning(None):
        result = pd.concat([df1, df2], axis=1, sort=sort)
    tm.assert_frame_equal(result, expected)


def test_concat_inner_sort(sort):
    # https://github.com/pandas-dev/pandas/pull/20613
    df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"])
    df2 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4])

    with tm.assert_produces_warning(None):
        # unset sort should *not* warn for inner join
        # since that never sorted
        result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True)

    expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"])
    if sort is True:
        expected = expected[["a", "b"]]
    tm.assert_frame_equal(result, expected)


def test_concat_aligned_sort():
    # GH-4588
    df = pd.DataFrame({"c": [1, 2], "b": [3, 4], "a": [5, 6]}, columns=["c", "b", "a"])
    result = pd.concat([df, df], sort=True, ignore_index=True)
    expected = pd.DataFrame(
        {"a": [5, 6, 5, 6], "b": [3, 4, 3, 4], "c": [1, 2, 1, 2]},
        columns=["a", "b", "c"],
    )
    tm.assert_frame_equal(result, expected)

    result = pd.concat([df, df[["c", "b"]]], join="inner", sort=True, ignore_index=True)
    expected = expected[["b", "c"]]
    tm.assert_frame_equal(result, expected)


def test_concat_aligned_sort_does_not_raise():
    # GH-4588
    # We catch TypeErrors from sorting internally and do not re-raise.
    df = pd.DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, "a"])
    expected = pd.DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"])
    result = pd.concat([df, df], ignore_index=True, sort=True)
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("s1name,s2name", [(np.int64(190), (43, 0)), (190, (43, 0))])
def test_concat_series_name_npscalar_tuple(s1name, s2name):
    # GH21015
    s1 = pd.Series({"a": 1, "b": 2}, name=s1name)
    s2 = pd.Series({"c": 5, "d": 6}, name=s2name)
    result = pd.concat([s1, s2])
    expected = pd.Series({"a": 1, "b": 2, "c": 5, "d": 6})
    tm.assert_series_equal(result, expected)


def test_concat_categorical_tz():
    # GH-23816
    a = pd.Series(pd.date_range("2017-01-01", periods=2, tz="US/Pacific"))
    b = pd.Series(["a", "b"], dtype="category")
    result = pd.concat([a, b], ignore_index=True)
    expected = pd.Series(
        [
            pd.Timestamp("2017-01-01", tz="US/Pacific"),
            pd.Timestamp("2017-01-02", tz="US/Pacific"),
            "a",
            "b",
        ]
    )
    tm.assert_series_equal(result, expected)


def test_concat_categorical_unchanged():
    # GH-12007
    # test fix for when concat on categorical and float
    # coerces dtype categorical -> float
    df = pd.DataFrame(pd.Series(["a", "b", "c"], dtype="category", name="A"))
    ser = pd.Series([0, 1, 2], index=[0, 1, 3], name="B")
    result = pd.concat([df, ser], axis=1)
    expected = pd.DataFrame(
        {
            "A": pd.Series(["a", "b", "c", np.nan], dtype="category"),
            "B": pd.Series([0, 1, np.nan, 2], dtype="float"),
        }
    )
    tm.assert_equal(result, expected)


def test_concat_datetimeindex_freq():
    # GH 3232
    # Monotonic index result
    dr = pd.date_range("01-Jan-2013", periods=100, freq="50L", tz="UTC")
    data = list(range(100))
    expected = pd.DataFrame(data, index=dr)
    result = pd.concat([expected[:50], expected[50:]])
    tm.assert_frame_equal(result, expected)

    # Non-monotonic index result
    result = pd.concat([expected[50:], expected[:50]])
    expected = pd.DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50]))
    expected.index._data.freq = None
    tm.assert_frame_equal(result, expected)


def test_concat_empty_df_object_dtype():
    # GH 9149
    df_1 = pd.DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
    df_2 = pd.DataFrame(columns=df_1.columns)
    result = pd.concat([df_1, df_2], axis=0)
    expected = df_1.astype(object)
    tm.assert_frame_equal(result, expected)


def test_concat_sparse():
    # GH 23557
    a = pd.Series(SparseArray([0, 1, 2]))
    expected = pd.DataFrame(data=[[0, 0], [1, 1], [2, 2]]).astype(
        pd.SparseDtype(np.int64, 0)
    )
    result = pd.concat([a, a], axis=1)
    tm.assert_frame_equal(result, expected)


def test_concat_dense_sparse():
    # GH 30668
    a = pd.Series(pd.arrays.SparseArray([1, None]), dtype=float)
    b = pd.Series([1], dtype=float)
    expected = pd.Series(data=[1, None, 1], index=[0, 1, 0]).astype(
        pd.SparseDtype(np.float64, None)
    )
    result = pd.concat([a, b], axis=0)
    tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("test_series", [True, False])
def test_concat_copy_index(test_series, axis):
    # GH 29879
    if test_series:
        ser = Series([1, 2])
        comb = concat([ser, ser], axis=axis, copy=True)
        assert comb.index is not ser.index
    else:
        df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
        comb = concat([df, df], axis=axis, copy=True)
        assert comb.index is not df.index
        assert comb.columns is not df.columns


def test_concat_multiindex_datetime_object_index():
    # https://github.com/pandas-dev/pandas/issues/11058
    s = Series(
        ["a", "b"],
        index=MultiIndex.from_arrays(
            [[1, 2], Index([dt.date(2013, 1, 1), dt.date(2014, 1, 1)], dtype="object")],
            names=["first", "second"],
        ),
    )
    s2 = Series(
        ["a", "b"],
        index=MultiIndex.from_arrays(
            [[1, 2], Index([dt.date(2013, 1, 1), dt.date(2015, 1, 1)], dtype="object")],
            names=["first", "second"],
        ),
    )
    expected = DataFrame(
        [["a", "a"], ["b", np.nan], [np.nan, "b"]],
        index=MultiIndex.from_arrays(
            [
                [1, 2, 2],
                DatetimeIndex(
                    ["2013-01-01", "2014-01-01", "2015-01-01"],
                    dtype="datetime64[ns]",
                    freq=None,
                ),
            ],
            names=["first", "second"],
        ),
    )
    result = concat([s, s2], axis=1)
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("keys", [["e", "f", "f"], ["f", "e", "f"]])
def test_duplicate_keys(keys):
    # GH 33654
    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
    s1 = Series([7, 8, 9], name="c")
    s2 = Series([10, 11, 12], name="d")
    result = concat([df, s1, s2], axis=1, keys=keys)
    expected_values = [[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]]
    expected_columns = pd.MultiIndex.from_tuples(
        [(keys[0], "a"), (keys[0], "b"), (keys[1], "c"), (keys[2], "d")]
    )
    expected = DataFrame(expected_values, columns=expected_columns)
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
    "obj",
    [
        tm.SubclassedDataFrame({"A": np.arange(0, 10)}),
        tm.SubclassedSeries(np.arange(0, 10), name="A"),
    ],
)
def test_concat_preserves_subclass(obj):
    # GH28330 -- preserve subclass

    result = concat([obj, obj])
    assert isinstance(result, type(obj))


def test_concat_frame_axis0_extension_dtypes():
    # preserve extension dtype (through common_dtype mechanism)
    df1 = pd.DataFrame({"a": pd.array([1, 2, 3], dtype="Int64")})
    df2 = pd.DataFrame({"a": np.array([4, 5, 6])})

    result = pd.concat([df1, df2], ignore_index=True)
    expected = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6]}, dtype="Int64")
    tm.assert_frame_equal(result, expected)

    result = pd.concat([df2, df1], ignore_index=True)
    expected = pd.DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64")
    tm.assert_frame_equal(result, expected)
alkaline-ml / pandas python

Version: 1.1.1

/ tests / reshape / test_concat.py

Products

About

Resources

Contact Gemfury