Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
PyDocX / tests / export / test_docx.py
Size: Mime:
from __future__ import (
    absolute_import,
    print_function,
    unicode_literals,
)

import base64
import os
from tempfile import NamedTemporaryFile

from nose.tools import raises

from pydocx.exceptions import MalformedDocxException
from pydocx.export.html import PyDocXHTMLExporter
from pydocx.test.testcases import BASE_HTML, DocXFixtureTestCaseFactory
from pydocx.test.utils import assert_html_equal
from pydocx.util.zip import ZipFile


def convert(path, *args, **kwargs):
    exporter = PyDocXHTMLExporter(path, *args, **kwargs)
    return exporter.export()


class ConvertDocxToHtmlTestCase(DocXFixtureTestCaseFactory):
    cases = (
        'all_configured_styles',
        'export_from_googledocs',
        'external_image',
        'has_missing_image',
        'has_missing_image',
        'has_title',
        'inline_tags',
        'justification',
        'list_in_table',
        'lists_with_styles',
        'missing_numbering',
        'missing_style',
        'nested_lists',
        'nested_table_rowspan',
        'nested_tables',
        'no_break_hyphen',
        'read_same_image_multiple_times',
        'rotate_image',
        'shift_enter',
        'simple',
        'simple_lists',
        'simple_table',
        'special_chars',
        'styled_bolding',
        'styled_color',
        'table_col_row_span',
        'table_with_multi_rowspan',
        'tables_in_lists',
        'textbox',
        'track_changes_on',
    )

    @raises(MalformedDocxException)
    def test_raises_malformed_when_relationships_are_missing(self):
        docx_path = self.get_path_to_fixture('missing_relationships.docx')
        self.convert_docx_to_html(docx_path)

    def test_unicode(self):
        docx_path = self.get_path_to_fixture('greek_alphabet.docx')
        actual_html = self.convert_docx_to_html(docx_path)
        assert actual_html is not None
        assert '\u0391\u03b1' in actual_html

    def test_result_from_file_pointer_matches_result_from_path(self):
        path = self.get_path_to_fixture('simple.docx')
        path_html = self.convert_docx_to_html(path)
        file_html = self.convert_docx_to_html(open(path, 'rb'))
        assert file_html
        self.assertEqual(path_html, file_html)


ConvertDocxToHtmlTestCase.generate()


def get_image_data(docx_file_path, image_name):
    """
    Return base 64 encoded data for the image_name that is stored in the
    docx_file_path.
    """
    with ZipFile(docx_file_path) as f:
        images = [
            e for e in f.infolist()
            if e.filename == 'word/media/%s' % image_name
        ]
        if not images:
            raise AssertionError('%s not in %s' % (image_name, docx_file_path))
        data = f.read(images[0].filename)
    return base64.b64encode(data).decode()


def test_has_image():
    file_path = os.path.join(
        os.path.abspath(os.path.dirname(__file__)),
        '..',
        'fixtures',
        'has_image.docx',
    )

    actual_html = convert(file_path)
    image_data = get_image_data(file_path, 'image1.gif')
    expected_html = BASE_HTML % '''
        <p>
            AAA
            <img
                height="55px"
                src="data:image/gif;base64,{data}"
                width="260px"
            />
        </p>
    '''.format(data=image_data)
    assert_html_equal(actual_html, expected_html)


@raises(MalformedDocxException)
def test_malformed_docx_exception():
    with NamedTemporaryFile(suffix='.docx') as f:
        convert(f.name)