Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

aaronreidsmith / pandas   python

Repository URL to install this package:

Version: 0.25.3 

/ tests / io / formats / test_to_latex.py

import codecs
from datetime import datetime

import pytest

import pandas as pd
from pandas import DataFrame, Series
from pandas.util import testing as tm


class TestToLatex:
    def test_to_latex_filename(self, float_frame):
        with tm.ensure_clean("test.tex") as path:
            float_frame.to_latex(path)

            with open(path, "r") as f:
                assert float_frame.to_latex() == f.read()

        # test with utf-8 and encoding option (GH 7061)
        df = DataFrame([["au\xdfgangen"]])
        with tm.ensure_clean("test.tex") as path:
            df.to_latex(path, encoding="utf-8")
            with codecs.open(path, "r", encoding="utf-8") as f:
                assert df.to_latex() == f.read()

        # test with utf-8 without encoding option
        with tm.ensure_clean("test.tex") as path:
            df.to_latex(path)
            with codecs.open(path, "r", encoding="utf-8") as f:
                assert df.to_latex() == f.read()

    def test_to_latex(self, float_frame):
        # it works!
        float_frame.to_latex()

        df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
        withindex_result = df.to_latex()
        withindex_expected = r"""\begin{tabular}{lrl}
\toprule
{} &  a &   b \\
\midrule
0 &  1 &  b1 \\
1 &  2 &  b2 \\
\bottomrule
\end{tabular}
"""

        assert withindex_result == withindex_expected

        withoutindex_result = df.to_latex(index=False)
        withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
 a &   b \\
\midrule
 1 &  b1 \\
 2 &  b2 \\
\bottomrule
\end{tabular}
"""

        assert withoutindex_result == withoutindex_expected

    def test_to_latex_format(self, float_frame):
        # GH Bug #9402
        float_frame.to_latex(column_format="ccc")

        df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
        withindex_result = df.to_latex(column_format="ccc")
        withindex_expected = r"""\begin{tabular}{ccc}
\toprule
{} &  a &   b \\
\midrule
0 &  1 &  b1 \\
1 &  2 &  b2 \\
\bottomrule
\end{tabular}
"""

        assert withindex_result == withindex_expected

    def test_to_latex_empty(self):
        df = DataFrame()
        result = df.to_latex()
        expected = r"""\begin{tabular}{l}
\toprule
Empty DataFrame
Columns: Index([], dtype='object')
Index: Index([], dtype='object') \\
\bottomrule
\end{tabular}
"""
        assert result == expected

        result = df.to_latex(longtable=True)
        expected = r"""\begin{longtable}{l}
\toprule
Empty DataFrame
Columns: Index([], dtype='object')
Index: Index([], dtype='object') \\
\end{longtable}
"""
        assert result == expected

    def test_to_latex_with_formatters(self):
        df = DataFrame(
            {
                "datetime64": [
                    datetime(2016, 1, 1),
                    datetime(2016, 2, 5),
                    datetime(2016, 3, 3),
                ],
                "float": [1.0, 2.0, 3.0],
                "int": [1, 2, 3],
                "object": [(1, 2), True, False],
            }
        )

        formatters = {
            "datetime64": lambda x: x.strftime("%Y-%m"),
            "float": lambda x: "[{x: 4.1f}]".format(x=x),
            "int": lambda x: "0x{x:x}".format(x=x),
            "object": lambda x: "-{x!s}-".format(x=x),
            "__index__": lambda x: "index: {x}".format(x=x),
        }
        result = df.to_latex(formatters=dict(formatters))

        expected = r"""\begin{tabular}{llrrl}
\toprule
{} & datetime64 &  float & int &    object \\
\midrule
index: 0 &    2016-01 & [ 1.0] & 0x1 &  -(1, 2)- \\
index: 1 &    2016-02 & [ 2.0] & 0x2 &    -True- \\
index: 2 &    2016-03 & [ 3.0] & 0x3 &   -False- \\
\bottomrule
\end{tabular}
"""
        assert result == expected

    def test_to_latex_multiindex(self):
        df = DataFrame({("x", "y"): ["a"]})
        result = df.to_latex()
        expected = r"""\begin{tabular}{ll}
\toprule
{} &  x \\
{} &  y \\
\midrule
0 &  a \\
\bottomrule
\end{tabular}
"""

        assert result == expected

        result = df.T.to_latex()
        expected = r"""\begin{tabular}{lll}
\toprule
  &   &  0 \\
\midrule
x & y &  a \\
\bottomrule
\end{tabular}
"""

        assert result == expected

        df = DataFrame.from_dict(
            {
                ("c1", 0): pd.Series({x: x for x in range(4)}),
                ("c1", 1): pd.Series({x: x + 4 for x in range(4)}),
                ("c2", 0): pd.Series({x: x for x in range(4)}),
                ("c2", 1): pd.Series({x: x + 4 for x in range(4)}),
                ("c3", 0): pd.Series({x: x for x in range(4)}),
            }
        ).T
        result = df.to_latex()
        expected = r"""\begin{tabular}{llrrrr}
\toprule
   &   &  0 &  1 &  2 &  3 \\
\midrule
c1 & 0 &  0 &  1 &  2 &  3 \\
   & 1 &  4 &  5 &  6 &  7 \\
c2 & 0 &  0 &  1 &  2 &  3 \\
   & 1 &  4 &  5 &  6 &  7 \\
c3 & 0 &  0 &  1 &  2 &  3 \\
\bottomrule
\end{tabular}
"""

        assert result == expected

        # GH 14184
        df = df.T
        df.columns.names = ["a", "b"]
        result = df.to_latex()
        expected = r"""\begin{tabular}{lrrrrr}
\toprule
a & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\
b &  0 &  1 &  0 &  1 &  0 \\
\midrule
0 &  0 &  4 &  0 &  4 &  0 \\
1 &  1 &  5 &  1 &  5 &  1 \\
2 &  2 &  6 &  2 &  6 &  2 \\
3 &  3 &  7 &  3 &  7 &  3 \\
\bottomrule
\end{tabular}
"""
        assert result == expected

        # GH 10660
        df = pd.DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]})
        result = df.set_index(["a", "b"]).to_latex()
        expected = r"""\begin{tabular}{llr}
\toprule
  &   &  c \\
a & b &    \\
\midrule
0 & a &  1 \\
  & b &  2 \\
1 & a &  3 \\
  & b &  4 \\
\bottomrule
\end{tabular}
"""

        assert result == expected

        result = df.groupby("a").describe().to_latex()
        expected = r"""\begin{tabular}{lrrrrrrrr}
\toprule
{} & \multicolumn{8}{l}{c} \\
{} & count & mean &       std &  min &   25\% &  50\% &   75\% &  max \\
a &       &      &           &      &       &      &       &      \\
\midrule
0 &   2.0 &  1.5 &  0.707107 &  1.0 &  1.25 &  1.5 &  1.75 &  2.0 \\
1 &   2.0 &  3.5 &  0.707107 &  3.0 &  3.25 &  3.5 &  3.75 &  4.0 \\
\bottomrule
\end{tabular}
"""

        assert result == expected

    def test_to_latex_multiindex_dupe_level(self):
        # see gh-14484
        #
        # If an index is repeated in subsequent rows, it should be
        # replaced with a blank in the created table. This should
        # ONLY happen if all higher order indices (to the left) are
        # equal too. In this test, 'c' has to be printed both times
        # because the higher order index 'A' != 'B'.
        df = pd.DataFrame(
            index=pd.MultiIndex.from_tuples([("A", "c"), ("B", "c")]), columns=["col"]
        )
        result = df.to_latex()
        expected = r"""\begin{tabular}{lll}
\toprule
  &   &  col \\
\midrule
A & c &  NaN \\
B & c &  NaN \\
\bottomrule
\end{tabular}
"""
        assert result == expected

    def test_to_latex_multicolumnrow(self):
        df = pd.DataFrame(
            {
                ("c1", 0): {x: x for x in range(5)},
                ("c1", 1): {x: x + 5 for x in range(5)},
                ("c2", 0): {x: x for x in range(5)},
                ("c2", 1): {x: x + 5 for x in range(5)},
                ("c3", 0): {x: x for x in range(5)},
            }
        )
        result = df.to_latex()
        expected = r"""\begin{tabular}{lrrrrr}
\toprule
{} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\
{} &  0 &  1 &  0 &  1 &  0 \\
\midrule
0 &  0 &  5 &  0 &  5 &  0 \\
1 &  1 &  6 &  1 &  6 &  1 \\
2 &  2 &  7 &  2 &  7 &  2 \\
3 &  3 &  8 &  3 &  8 &  3 \\
4 &  4 &  9 &  4 &  9 &  4 \\
\bottomrule
\end{tabular}
"""
        assert result == expected

        result = df.to_latex(multicolumn=False)
        expected = r"""\begin{tabular}{lrrrrr}
\toprule
{} & c1 &    & c2 &    & c3 \\
{} &  0 &  1 &  0 &  1 &  0 \\
\midrule
0 &  0 &  5 &  0 &  5 &  0 \\
1 &  1 &  6 &  1 &  6 &  1 \\
2 &  2 &  7 &  2 &  7 &  2 \\
3 &  3 &  8 &  3 &  8 &  3 \\
4 &  4 &  9 &  4 &  9 &  4 \\
\bottomrule
\end{tabular}
"""
        assert result == expected

        result = df.T.to_latex(multirow=True)
        expected = r"""\begin{tabular}{llrrrrr}
\toprule
   &   &  0 &  1 &  2 &  3 &  4 \\
\midrule
\multirow{2}{*}{c1} & 0 &  0 &  1 &  2 &  3 &  4 \\
   & 1 &  5 &  6 &  7 &  8 &  9 \\
\cline{1-7}
\multirow{2}{*}{c2} & 0 &  0 &  1 &  2 &  3 &  4 \\
   & 1 &  5 &  6 &  7 &  8 &  9 \\
\cline{1-7}
c3 & 0 &  0 &  1 &  2 &  3 &  4 \\
\bottomrule
\end{tabular}
"""
        assert result == expected

        df.index = df.T.index
        result = df.T.to_latex(multirow=True, multicolumn=True, multicolumn_format="c")
        expected = r"""\begin{tabular}{llrrrrr}
\toprule
   &   & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\
   &   &  0 &  1 &  0 &  1 &  0 \\
\midrule
\multirow{2}{*}{c1} & 0 &  0 &  1 &  2 &  3 &  4 \\
   & 1 &  5 &  6 &  7 &  8 &  9 \\
\cline{1-7}
\multirow{2}{*}{c2} & 0 &  0 &  1 &  2 &  3 &  4 \\
   & 1 &  5 &  6 &  7 &  8 &  9 \\
\cline{1-7}
c3 & 0 &  0 &  1 &  2 &  3 &  4 \\
\bottomrule
\end{tabular}
"""
        assert result == expected

    def test_to_latex_escape(self):
        a = "a"
        b = "b"

        test_dict = {"co$e^x$": {a: "a", b: "b"}, "co^l1": {a: "a", b: "b"}}

        unescaped_result = DataFrame(test_dict).to_latex(escape=False)
        escaped_result = DataFrame(test_dict).to_latex()  # default: escape=True

        unescaped_expected = r"""\begin{tabular}{lll}
\toprule
{} & co$e^x$ & co^l1 \\
\midrule
a &       a &     a \\
b &       b &     b \\
\bottomrule
\end{tabular}
"""

        escaped_expected = r"""\begin{tabular}{lll}
\toprule
{} & co\$e\textasciicircum x\$ & co\textasciicircum l1 \\
\midrule
a &       a &     a \\
b &       b &     b \\
\bottomrule
\end{tabular}
"""

        assert unescaped_result == unescaped_expected
        assert escaped_result == escaped_expected

    def test_to_latex_special_escape(self):
        df = DataFrame([r"a\b\c", r"^a^b^c", r"~a~b~c"])

        escaped_result = df.to_latex()
        escaped_expected = r"""\begin{tabular}{ll}
\toprule
{} &       0 \\
\midrule
0 &   a\textbackslash b\textbackslash c \\
1 &  \textasciicircum a\textasciicircum b\textasciicircum c \\
2 &  \textasciitilde a\textasciitilde b\textasciitilde c \\
\bottomrule
\end{tabular}
"""
        assert escaped_result == escaped_expected

    def test_to_latex_longtable(self, float_frame):
        float_frame.to_latex(longtable=True)

        df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
        withindex_result = df.to_latex(longtable=True)
        withindex_expected = r"""\begin{longtable}{lrl}
\toprule
{} &  a &   b \\
\midrule
\endhead
\midrule
\multicolumn{3}{r}{{Continued on next page}} \\
\midrule
\endfoot

\bottomrule
\endlastfoot
0 &  1 &  b1 \\
1 &  2 &  b2 \\
\end{longtable}
"""
        assert withindex_result == withindex_expected

        withoutindex_result = df.to_latex(index=False, longtable=True)
        withoutindex_expected = r"""\begin{longtable}{rl}
\toprule
 a &   b \\
\midrule
\endhead
\midrule
\multicolumn{2}{r}{{Continued on next page}} \\
\midrule
\endfoot

\bottomrule
\endlastfoot
 1 &  b1 \\
 2 &  b2 \\
\end{longtable}
"""

        assert withoutindex_result == withoutindex_expected

        df = DataFrame({"a": [1, 2]})
        with1column_result = df.to_latex(index=False, longtable=True)
        assert r"\multicolumn{1}" in with1column_result

        df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
        with3columns_result = df.to_latex(index=False, longtable=True)
        assert r"\multicolumn{3}" in with3columns_result

    def test_to_latex_escape_special_chars(self):
        special_characters = ["&", "%", "$", "#", "_", "{", "}", "~", "^", "\\"]
        df = DataFrame(data=special_characters)
        observed = df.to_latex()
        expected = r"""\begin{tabular}{ll}
\toprule
{} &  0 \\
\midrule
0 &  \& \\
1 &  \% \\
2 &  \$ \\
3 &  \# \\
4 &  \_ \\
5 &  \{ \\
6 &  \} \\
7 &  \textasciitilde  \\
8 &  \textasciicircum  \\
9 &  \textbackslash  \\
\bottomrule
\end{tabular}
"""

        assert observed == expected

    def test_to_latex_no_header(self):
        # GH 7124
        df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
        withindex_result = df.to_latex(header=False)
        withindex_expected = r"""\begin{tabular}{lrl}
\toprule
0 &  1 &  b1 \\
1 &  2 &  b2 \\
\bottomrule
\end{tabular}
"""

        assert withindex_result == withindex_expected

        withoutindex_result = df.to_latex(index=False, header=False)
        withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
 1 &  b1 \\
 2 &  b2 \\
\bottomrule
\end{tabular}
"""

        assert withoutindex_result == withoutindex_expected

    def test_to_latex_specified_header(self):
        # GH 7124
        df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
        withindex_result = df.to_latex(header=["AA", "BB"])
        withindex_expected = r"""\begin{tabular}{lrl}
\toprule
{} & AA &  BB \\
\midrule
0 &  1 &  b1 \\
1 &  2 &  b2 \\
\bottomrule
\end{tabular}
"""

        assert withindex_result == withindex_expected

        withoutindex_result = df.to_latex(header=["AA", "BB"], index=False)
        withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
AA &  BB \\
\midrule
 1 &  b1 \\
 2 &  b2 \\
\bottomrule
\end{tabular}
"""

        assert withoutindex_result == withoutindex_expected

        withoutescape_result = df.to_latex(header=["$A$", "$B$"], escape=False)
        withoutescape_expected = r"""\begin{tabular}{lrl}
\toprule
{} & $A$ & $B$ \\
\midrule
0 &   1 &  b1 \\
1 &   2 &  b2 \\
\bottomrule
\end{tabular}
"""

        assert withoutescape_result == withoutescape_expected

        with pytest.raises(ValueError):
            df.to_latex(header=["A"])

    def test_to_latex_decimal(self, float_frame):
        # GH 12031
        float_frame.to_latex()

        df = DataFrame({"a": [1.0, 2.1], "b": ["b1", "b2"]})
        withindex_result = df.to_latex(decimal=",")

        withindex_expected = r"""\begin{tabular}{lrl}
\toprule
{} &    a &   b \\
\midrule
0 &  1,0 &  b1 \\
1 &  2,1 &  b2 \\
\bottomrule
\end{tabular}
"""

        assert withindex_result == withindex_expected

    def test_to_latex_series(self):
        s = Series(["a", "b", "c"])
        withindex_result = s.to_latex()
        withindex_expected = r"""\begin{tabular}{ll}
\toprule
{} &  0 \\
\midrule
0 &  a \\
1 &  b \\
2 &  c \\
\bottomrule
\end{tabular}
"""
        assert withindex_result == withindex_expected

    def test_to_latex_bold_rows(self):
        # GH 16707
        df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
        observed = df.to_latex(bold_rows=True)
        expected = r"""\begin{tabular}{lrl}
\toprule
{} &  a &   b \\
\midrule
\textbf{0} &  1 &  b1 \\
\textbf{1} &  2 &  b2 \\
\bottomrule
\end{tabular}
"""
        assert observed == expected

    def test_to_latex_no_bold_rows(self):
        # GH 16707
        df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
        observed = df.to_latex(bold_rows=False)
        expected = r"""\begin{tabular}{lrl}
\toprule
{} &  a &   b \\
\midrule
0 &  1 &  b1 \\
1 &  2 &  b2 \\
\bottomrule
\end{tabular}
"""
        assert observed == expected

    @pytest.mark.parametrize("name0", [None, "named0"])
    @pytest.mark.parametrize("name1", [None, "named1"])
    @pytest.mark.parametrize("axes", [[0], [1], [0, 1]])
    def test_to_latex_multiindex_names(self, name0, name1, axes):
        # GH 18667
        names = [name0, name1]
        mi = pd.MultiIndex.from_product([[1, 2], [3, 4]])
        df = pd.DataFrame(-1, index=mi.copy(), columns=mi.copy())
        for idx in axes:
            df.axes[idx].names = names

        idx_names = tuple(n or "{}" for n in names)
        idx_names_row = (
            "{idx_names[0]} & {idx_names[1]} &    &    &    &    \\\\\n".format(
                idx_names=idx_names
            )
            if (0 in axes and any(names))
            else ""
        )
        placeholder = "{}" if any(names) and 1 in axes else " "
        col_names = [n if (bool(n) and 1 in axes) else placeholder for n in names]
        observed = df.to_latex()
        expected = r"""\begin{tabular}{llrrrr}
\toprule
  & %s & \multicolumn{2}{l}{1} & \multicolumn{2}{l}{2} \\
  & %s &  3 &  4 &  3 &  4 \\
%s\midrule
1 & 3 & -1 & -1 & -1 & -1 \\
  & 4 & -1 & -1 & -1 & -1 \\
2 & 3 & -1 & -1 & -1 & -1 \\
  & 4 & -1 & -1 & -1 & -1 \\
\bottomrule
\end{tabular}
""" % tuple(
            list(col_names) + [idx_names_row]
        )
        assert observed == expected

    @pytest.mark.parametrize("one_row", [True, False])
    def test_to_latex_multiindex_nans(self, one_row):
        # GH 14249
        df = pd.DataFrame({"a": [None, 1], "b": [2, 3], "c": [4, 5]})
        if one_row:
            df = df.iloc[[0]]
        observed = df.set_index(["a", "b"]).to_latex()
        expected = r"""\begin{tabular}{llr}
\toprule
    &   &  c \\
a & b &    \\
\midrule
NaN & 2 &  4 \\
"""
        if not one_row:
            expected += r"""1.0 & 3 &  5 \\
"""
        expected += r"""\bottomrule
\end{tabular}
"""
        assert observed == expected

    def test_to_latex_non_string_index(self):
        # GH 19981
        observed = pd.DataFrame([[1, 2, 3]] * 2).set_index([0, 1]).to_latex()
        expected = r"""\begin{tabular}{llr}
\toprule
  &   &  2 \\
0 & 1 &    \\
\midrule
1 & 2 &  3 \\
  & 2 &  3 \\
\bottomrule
\end{tabular}
"""
        assert observed == expected

    def test_to_latex_midrule_location(self):
        # GH 18326
        df = pd.DataFrame({"a": [1, 2]})
        df.index.name = "foo"
        observed = df.to_latex(index_names=False)
        expected = r"""\begin{tabular}{lr}
\toprule
{} &  a \\
\midrule
0 &  1 \\
1 &  2 \\
\bottomrule
\end{tabular}
"""

        assert observed == expected

    def test_to_latex_multiindex_empty_name(self):
        # GH 18669
        mi = pd.MultiIndex.from_product([[1, 2]], names=[""])
        df = pd.DataFrame(-1, index=mi, columns=range(4))
        observed = df.to_latex()
        expected = r"""\begin{tabular}{lrrrr}
\toprule
  &  0 &  1 &  2 &  3 \\
{} &    &    &    &    \\
\midrule
1 & -1 & -1 & -1 & -1 \\
2 & -1 & -1 & -1 & -1 \\
\bottomrule
\end{tabular}
"""
        assert observed == expected

    def test_to_latex_float_format_no_fixed_width(self):

        # GH 21625
        df = DataFrame({"x": [0.19999]})
        expected = r"""\begin{tabular}{lr}
\toprule
{} &     x \\
\midrule
0 & 0.200 \\
\bottomrule
\end{tabular}
"""
        assert df.to_latex(float_format="%.3f") == expected

        # GH 22270
        df = DataFrame({"x": [100.0]})
        expected = r"""\begin{tabular}{lr}
\toprule
{} &   x \\
\midrule
0 & 100 \\
\bottomrule
\end{tabular}
"""
        assert df.to_latex(float_format="%.0f") == expected

    def test_to_latex_multindex_header(self):
        # GH 16718
        df = pd.DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}).set_index(
            ["a", "b"]
        )
        observed = df.to_latex(header=["r1", "r2"])
        expected = r"""\begin{tabular}{llrr}
\toprule
  &   & r1 & r2 \\
a & b &    &    \\
\midrule
0 & 1 &  2 &  3 \\
\bottomrule
\end{tabular}
"""
        assert observed == expected