Repository URL to install this package:
|
Version:
0.2.0a1 ▾
|
from re import Pattern
from django.db import models
from django.db.models import When, Case, Value
from remote_resources.raw_items.admin import RawItemAdmin
from remote_resources.raw_items.models import RawItemInterface
from remote_resources.raw_items.querysets import RawItemQuerySet
from webclipper.abstracts import WebClipInterface
from webclipper.admin import WebClipAdmin
from webclipper.utils import simplify_regex_named_groups
class WebClipRawItemQuerySet(RawItemQuerySet):
def _bulk_update_processed_item(self, processed_raw_items):
return self.bulk_update(processed_raw_items, ('_pi_content_type', '_pi_object_id'))
def annotate_is_processed(self):
return self.annotate(is_processed=Case(
When(_pi_object_id__isnull=False, then=Value(True)),
default=Value(False),
output_field=models.BooleanField()
))
def filter_relevant(self):
return self.filter(page_url__regex=self.model._get_url_regex_for_db())
class WebClipRawItemManager(models.Manager):
def _get_queryset_class(self):
return getattr(self.model, 'queryset_cls', WebClipRawItemQuerySet)
def get_queryset(self):
qs_cls = self._get_queryset_class()
return qs_cls(self.model, self._db).filter_relevant()
class AbstractWebClipRawItem(WebClipInterface, RawItemInterface, models.Model):
class Meta:
abstract = True
@property
def raw(self):
return self.html_content
url_regex: Pattern[str]
def is_valid(self):
return bool(self.url_regex.match(self.page_url))
@classmethod
def _get_url_regex_for_db(cls):
return simplify_regex_named_groups(cls.url_regex.pattern)
objects = WebClipRawItemManager()
class WebClipRawItemAdmin(RawItemAdmin, WebClipAdmin):
list_display = (*WebClipAdmin.list_display, 'is_processed')
readonly_fields = (
*RawItemAdmin.readonly_fields,
*WebClipAdmin.readonly_fields,
)
fieldsets = (
*WebClipAdmin.fieldsets[:2],
("Processing", {'fields': ('processed_item_display', 'is_processed')}),
*WebClipAdmin.fieldsets[3:],
)
scraped_fields = None
def get_readonly_fields(self, request, obj=None):
readonly_fields = super().get_readonly_fields(request, obj)
if self.scraped_fields is not None:
readonly_fields = (
*readonly_fields,
*self.scraped_fields,
)
return readonly_fields
def get_fieldsets(self, request, obj=None):
fieldsets = super().get_fieldsets(request, obj)
if fieldsets and self.scraped_fields is not None:
fieldsets = (
*fieldsets[:2],
("Scraped", {'fields': self.scraped_fields}),
*fieldsets[2:],
)
return fieldsets
__all__ = (
'WebClipRawItemQuerySet',
'WebClipRawItemManager',
'AbstractWebClipRawItem',
'WebClipRawItemAdmin',
)