Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
# !/usr/bin/python
# coding=utf-8
#
# Copyright (C) 2018-2025 by dream-alpha
#
# In case of reuse of this source code please do not remove this copyright.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# For more information on the GNU General Public License see:
# <http://www.gnu.org/licenses/>.


import re
import json
from .Debug import logger


# Category mapping
category = {
    'SP': 'Spielfilm',
    'SE': 'Serie',
    'RE': 'Report',
    'U': 'Unterhaltung',
    'KIN': 'Kinder',
    'SPO': 'Sport',
    'AND': 'Nachrichten'
}

# Regex patterns for base page parsing
html_data_val_com = re.compile('<tr class="hover">.+?class="editorial-rating.+?</table>.+?</div>', re.S)
masterval_com = re.compile('<tr class="hover">.+?class="editorial-rating.+?"></span></td>', re.S)
data_rel_start_com = re.compile(r'data-rel-start="(.+?)"', re.S)
data_rel_end_com = re.compile(r'data-rel-end="(.+?)"', re.S)
data_tracking_point_com = re.compile(
    r'<span>'  # Opening span tag
    # URL of the program (urlsendung)
    r'.+?<a href="(?P<url>http.+?html)"'
    r'.+?saveRef.+?'  # saveRef function call
    # Title attribute containing full title + subtitle
    r'title="(?P<full_title>.+?)"'
    # Extract raw JSON from data-tracking-point
    r'.+?data-tracking-point=\'(?P<data_tracking_point>.*?)\''
    r'.+?<strong>(?P<title>.+?)</strong>'  # The display title
    r'.+?</a>(?P<info>.*?)</span>', re.S  # Additional info after title
)

# Regex patterns for details page parsing
sectionidcontent_con_com = re.compile(r'<div class="content-area">.+?="inline-section_images"', re.S)
sectionidcontent_alt_com = re.compile(r'<div class="content-area">(.*?)<aside class="aside">', re.S)
xymatic_video_com = re.compile(r'"xymatic-video".+?"contentDesc": "(.+?)".+?http.+?key=(.+?)"', re.S)
og_image_com = re.compile(r'og:image" content="(.+?)" />', re.S)
descriptionre_com = re.compile(r'<section class="broadcast-detail__description">(.*?)</section>', re.S)
descriptionreg_com = re.compile(r'<p>(.*?)</p>', re.S)
episode_title_com = re.compile(r'<h2 class="broadcast-info">(.*?)</h2>', re.S)
title_attr_com = re.compile(r'<div class="programm-listing-livetv">.*?title="([^"]+)"', re.S)


def tvs_parse(html_data):
    """Parse base TV program listing data from HTML"""

    result = []
    html_data_val = html_data_val_com.search(html_data)

    if not html_data_val:
        return result

    masterval = masterval_com.findall(html_data_val.group())

    for master_item in masterval:
        logger.debug("master_item: %s", master_item)
        event = {
            'urlsendung': "",
            'title': "",
            'subtitle': "",
            'year': "",
            'country': "",
            'category': "",
            'genre': "",
            'startTime': 0,
            'endTime': 0,
            'duration': 0,
            'channel': ''
        }

        tmp = data_tracking_point_com.search(master_item, re.S)
        if tmp:

            logger.debug("***********************************")
            for name in tmp.groupdict():
                logger.debug("group %s: %s", name, tmp.group(name))

            event['urlsendung'] = tmp.group('url')
            display_title = tmp.group('title')
            event['title'] = display_title

            # Extract subtitle from full title
            full_title = tmp.group('full_title')
            if full_title.startswith(display_title):
                subtitle = full_title[len(display_title):].strip()
                if subtitle:
                    subtitle = subtitle.split(",")[0].strip()
                    event["subtitle"] = subtitle

            logger.debug("full_title: %s", full_title)
            full_title_parts = full_title.split(",")
            full_title = full_title_parts[0].strip(
            ) if full_title_parts else ""

            country_year = ""
            if len(full_title_parts) > 2:
                country_year = full_title_parts[2].strip()
            elif len(full_title_parts) > 1:
                country_year = full_title_parts[1].strip()
            event['year'] = country_year

            if tmp.group('data_tracking_point'):
                tracking_data = json.loads(tmp.group('data_tracking_point'))
                logger.debug("tracking_data: %s", tracking_data)

                # Extract values directly from the parsed data
                event['has_video'] = bool(
                    int(tracking_data.get('videoIntegration', 0)))
                event['genre'] = tracking_data.get('genre', '')
                event['category'] = tracking_data.get('category1', '')
                event['channel'] = tracking_data.get('channel', '')

        endTime = startTime = 0
        tmp = data_rel_start_com.search(master_item, re.S)
        if tmp:
            startTime = int(tmp.group(1))
            event['startTime'] = startTime

        tmp = data_rel_end_com.search(master_item, re.S)
        if tmp:
            endTime = int(tmp.group(1))
            event['endTime'] = endTime

        event['duration'] = (endTime - startTime) / 60

        result.append(event)

        for key, value in event.items():
            logger.debug("event %s: %s", key, value)
    return result


def tvs_parse_details(html_data, event):
    """Parse details page for a specific TV program"""

    sectionidcontent_con_val = sectionidcontent_con_com.search(html_data)
    logger.debug("sectionidcontent_con_val 1: %s", sectionidcontent_con_val)
    if not sectionidcontent_con_val:
        sectionidcontent_con_val = sectionidcontent_alt_com.search(html_data)
    logger.debug("sectionidcontent_con_val 2: %s", sectionidcontent_con_val)
    # Extract video URL
    event['video_url'] = ""
    xymatic_video_val = xymatic_video_com.search(
        sectionidcontent_con_val.group())
    if xymatic_video_val:
        event['video_url'] = "https://media.delight.video/{0}/{1}/MEDIA/v0/HD/media.mp4".format(
            xymatic_video_val.group(2).strip(),
            xymatic_video_val.group(1).strip()
        )

    # Extract image
    event['photo_url'] = ""
    og_image_val = og_image_com.search(html_data)
    if og_image_val:
        event['photo_url'] = og_image_val.group(1).strip()

    # Extract description
    event['longDescription'] = ""
    descriptionre_val = descriptionre_com.search(html_data)
    if descriptionre_val:
        # Extract paragraphs directly
        descriptionreg_val = descriptionreg_com.findall(
            descriptionre_val.group(1))
        if descriptionreg_val:
            for desval in descriptionreg_val:
                if desval:
                    event['longDescription'] += desval + "\n\n"

    # Extract episode subtitle/title from broadcast-info header
    episode_title_val = episode_title_com.search(html_data)
    if episode_title_val:
        subtitle_text = episode_title_val.group(1)
        if not subtitle_text.startswith("Mehr"):
            event['subtitle'] = subtitle_text

    return event