"""
This module implements the following classes
* :py:class:`eezz.document.TManifest`: The Manifest contains the attributes and the content of a document.\
This includes for example author, creation date and embedded external files.
* :py:class:`eezz.document.TDocument`: A document consists of one or more embedded files and the Manifest.\
The class implements methods for file download and creating a TAR archive.
A document has always a reference to a shelf, which contains documents with the same Manifest layout
"""
import re
import time
import tarfile
import json
from loguru import logger
from abc import abstractmethod
from io import BytesIO
from eezz.filesrv import TFile, TFileMode
from service import TService
from pathlib import Path
from dataclasses import dataclass
from math import floor
from typing import List, Dict, override
@dataclass(kw_only=True)
class TManifest:
""" The manifest represents the information for the document. It defines a solid way to ensures a consistent
structure for parsing the internal attributes.
"""
keys_section_header: list
keys_section_doc: list = None
keys_section_files: list = None
structure_document: dict = None
map_files: dict = None
def __post_init__(self):
# Prepare consistent access of manifest to database
self.keys_section_doc = ['document', 'files', 'signature']
self.keys_section_files = ['source', 'name', 'size', 'type']
self.structure_document = {'document': {}, 'files': []}
@property
def document(self) -> dict:
""":meta private:"""
return self.structure_document['document']
@document.setter
def document(self, value: dict):
""":meta private:"""
self.map_files = dict()
x_document_descr = {x: value.get(x, '') for x in self.keys_section_header}
self.structure_document['document'] = x_document_descr
@property
def files(self) -> list:
""":meta private:"""
return self.structure_document['files']
def append_file(self, file: dict):
""":meta private:"""
if not self.map_files.get(file['source']):
self.map_files[file['source']] = list()
x_file_descr = {x: file.get(x, '') for x in self.keys_section_files}
self.structure_document['files'].append(x_file_descr)
self.map_files[file['source']].append(file['name'])
@property
def column_names(self) -> list:
""":meta private:"""
return self.keys_section_header
@override
def __str__(self):
for x, y in self.map_files.items():
self.structure_document['document'][x] = y
return json.dumps(self.structure_document, indent=4)
def loads(self, manifest_str):
""":meta private:"""
self.structure_document = json.loads(manifest_str)
[docs]
@dataclass(kw_only=True)
class TDocument:
""" Manages documents
A document is a zipped TAR file, w ith a Metafile and a collection of data files.
:ivar Path path: Documents bookshelf path
:ivar List[str] attributes: List of attributes like author and title
:ivar str shelf_name: A document has always a reference to a bookshelf
:ivar TManifest manifest: Document header definition
:ivar List[TFile] files_list: List of embedded files
"""
shelf_name: str #: :meta private:
attributes: List[str] #: List of document attributes like author and title
manifest: TManifest = None #: :meta private:
path: Path = None #: :meta private:
count: int = 0 #: :meta private:
finished: bool = False #: :meta private:
file_sources: List[str] = None #: :meta private:
files_transferred: int = 0
map_files: Dict[str, TFile] = None #: :meta private:
map_source: Dict[str, List[TFile]] = None #: :meta private:
transferred: int = 0 #: :meta private:
title: str = '' #: :meta private:
def __post_init__(self):
""" combine attributes:
The mandatory attribute "title" is inserted at the start,
the file sources at the end
The file sources might have one or more references, which are represented as list of files in the Manif<est
"""
self.attributes = [x for x in self.attributes]
if self.file_sources:
self.attributes += self.file_sources
if not self.path:
self.path = TService().document_path
self.manifest = TManifest(keys_section_header=self.attributes)
[docs]
def initialize_document(self, values: list) -> None:
""" Initializes the document, providing values for the Manifest header
:param List[str] values: List of values according to the definition of columns
"""
self.finished = False
self.count = 0
self.manifest.document = {x: y for x, y in zip(self.attributes, values)}
self.map_source = dict()
self.map_files = dict()
self.transferred = 0
self.title = values[0]
[docs]
def download_file(self, file: dict, stream: bytes = b'') -> bytes:
""" Download file callback.
The method is called for each chunk and for a final acknowledge. If all files
are transferred, the document is created.
For each final acknowledge a 100% is returned.
:param dict file: File descriptor with details on file and byte stream
:param bytearray stream: File stream, usually a chunk of
:return: The percentage of transferred document size as bytearray, terminated with the percent sign
:rtype: bytearray
"""
if file['opcode'] == 'finished':
# Check if we got all elements of a single source:
if self.transferred == file['all_volume']:
self.create_document()
self.finished = True
return '100%'.encode('utf8')
x_fraction = 100 * len(self.map_source[file['source']]) / int(file['src_files'])
return f'{x_fraction}%'.encode('utf8')
# Manage the file sources: Each source may have many entries
# Drag hte number of successful loaded elements of this specific source
if not self.map_source.get(file['source']):
self.map_source[file['source']] = list()
if not self.map_files.get(file['name']):
x_path = TService().public_path / self.title
x_path.mkdir(exist_ok=True)
x_path /= file['name']
xt_file = TFile(file_type=file['source'], size = file['size'], chunk_size = file['chunk_size'], destination = x_path)
self.map_files[file['name']] = xt_file
self.map_source[file['source']].append(xt_file)
self.manifest.append_file(file)
xt_file = self.map_files[file['name']]
xt_file.write(stream, file['sequence'], mode = TFileMode.NORMAL)
# return percentage for a specific source
self.transferred += xt_file.transferred
x_src_volume = 0
for xt_f in self.map_source[file['source']]:
x_src_volume += xt_f.transferred
x_percent = 100 * x_src_volume / int(file['src_volume'])
return f'{x_percent}%'.encode('utf8')
[docs]
@abstractmethod
def create_document(self):
""" Abstract method which is called after all files are in place """
pass
[docs]
def create_archive(self, document_title: str) -> None:
""" ZIP the given files and the manifest to a document.
The TFile class keeps track on the location of the file content and their properties.
:param str document_title: The name of the archive
"""
x_zip_stream = BytesIO()
x_zip_stream.write(str(self.manifest).encode('utf-8'))
x_zip_root = Path('.')
# Path is: destination / book / document
x_destination = self.path / f'{self.shelf_name}/{document_title}.tar'
x_destination.parent.mkdir(exist_ok=True)
with tarfile.TarFile(x_destination, "w") as x_zip_file:
# store the info at the start of the tar file
x_entry_path = Path(x_zip_root) / 'Manifest'
x_tar_info = tarfile.TarInfo(name=str(x_entry_path))
x_tar_info.size = x_zip_stream.tell()
x_tar_info.mtime = floor(time.time())
x_zip_stream.seek(0)
x_zip_file.addfile(tarinfo=x_tar_info, fileobj=x_zip_stream)
# Sort the files by source for output zip:
for x_name, x_file in self.map_files.items():
x_entry_path = Path(x_zip_root) / x_file.destination.name
x_source_path = x_file.destination
x_stat_info = x_source_path.stat()
x_tar_info = tarfile.TarInfo(name=str(x_entry_path))
x_tar_info.size = x_stat_info.st_size
x_tar_info.mtime = floor(time.time())
with x_source_path.open("rb") as x_input:
x_zip_file.addfile(tarinfo=x_tar_info, fileobj=x_input)
[docs]
def read_file(self, document_title: str, file_name: str) -> bytes:
""" Returns the bytestream of the specified file in the archive
:param str document_title: The title of the document is the name of the archive
:param str file_name: The file content to return
"""
x_source = self.path / f'{self.shelf_name}/{document_title}.tar'
with tarfile.TarFile(x_source, "r") as x_zip_file:
for x_tar_info in x_zip_file.getmembers():
x_dest = Path(x_tar_info.name)
if x_dest.name == file_name:
if x_buffer := x_zip_file.extractfile(x_tar_info):
return x_buffer.read()
# -- section for module tests
def test_document():
""":meta private:"""
logger.debug('Test class Document')
my_doc = TDocument(shelf_name='First', attributes=['title', 'desc', 'author', 'price', 'valid'], file_sources=['main'])
logger.success('test document')
if __name__ == '__main__':
""":meta private:"""
TService.set_environment(root_path='/Users/alzer/Projects/github/eezz_full/webroot')
test_document()