rtx/rtxPage.py

# -*- coding: UTF-8 -*-
'''
rtx - RetroText
rtxPwage: Klasse zur Benutzung einer CEPT-Seite
by Anna Christina Naß <acn@acn.wtf>
released under GPL
'''

import os
import re
import config
import logging

class rtxPage:
    """ Klasse zur Benutzung einer CEPT-Seite
    """

    die_seite = ""
    seiten_nummer = -1
    link_liste = {}

    def __init__(self, page = None):
        """ create a new page object for the page 'page' 
        returns False if the page cannot be found
        returns True if the page has been loaded
        """
        if page == None:
            return
        else:
            self.die_seite = ""
            self.seiten_nummer = -1
            self.link_liste = {}
            self._load_page(page)

    @staticmethod
    def exists(page):
        """ checks if the page (i.e. the file) exists """
        for path in config.PAGES:
            if os.path.isfile(path + page) or os.path.isfile(path + page + ".cept"):
              return True
        return False

    def get_page(self):
        """ returns the page if loaded 
        returns False if no page has been loaded
        """
        if self.die_seite != "":
            return self.die_seite
        else:
            return False

    def set_page(self,pagedata):
        """ write the pagedata as the new page and (probably) the link list """
        self.die_seite = pagedata
        self.link_liste = self._get_link_list()

    def get_links(self):
        """ returns the list of links found in this page """
        return self.link_liste

    def get_link(self, link):
        """ returns the target for a given link
        returns False if this link does not exist 
        """
        if link in self.link_liste:
            return self.link_liste[link]
        else:
            return False

    def get_page_id(self):
        """ returns the id of the current page """
        return self.seiten_nummer

    def _load_page(self, page):
        """ Load a CEPT page from the file system 
        returns False if the page cannot be found
        returns True if the page has been loaded
        """
        for path in config.PAGES:
            if os.path.isfile(path + page):
                filename = path + page
                """ page has been found, now exit the for loop """
                break
            if os.path.isfile(path + page + ".cept"):
                filename = path + page + ".cept"
                """ page has been found - with .cept extension """
                break
            if os.path.isfile(path + page.lower()):
                filename = path + page.lower()
                """ page has been found with lowercase filename """
                break
            if os.path.isfile(path + page.lower() + ".cept"):
                filename = path + page.lower() + ".cept"
                """ page has been found with lowercase filename and .cept extension """
                break
        else:
            """ page does not exist anywhere, so exit the function """
            return False
        
        with open(filename, "rb") as f:
            self.die_seite = f.read()
        self.seiten_nummer = page
        self.link_liste = self._get_link_list()
        return True

    def _get_link_list(self):
        """ private function which parses the CEPT page and extracts the link list
        returns True if links have been found
        returns False if no links have been found
        """
        links = []
        links = re.findall("\x1f\x3d([^\x1f\x9b\x1b]+)", str(self.die_seite, "latin-1"))
        liste = {}

        for item in links:
            if item[0] != "0":
                link = item[1:3].strip()
                target = item[3:].strip()
                liste[link] = target

        logging.info(liste)

        return liste
Initial commit 2016-05-23 16:01:04 +02:00			`# -- coding: UTF-8 --`
			`'''`
			`rtx - RetroText`
			`rtxPwage: Klasse zur Benutzung einer CEPT-Seite`
			`by Anna Christina Naß <acn@acn.wtf>`
			`released under GPL`
			`'''`

			`import os`
			`import re`
			`import config`
			`import logging`

			`class rtxPage:`
changed tabs to 4 spaces + some minor fixes in rtxHelpers 2016-06-02 16:49:43 +02:00			`""" Klasse zur Benutzung einer CEPT-Seite`
			`"""`

			`die_seite = ""`
			`seiten_nummer = -1`
			`link_liste = {}`

			`def __init__(self, page = None):`
			`""" create a new page object for the page 'page'`
			`returns False if the page cannot be found`
			`returns True if the page has been loaded`
			`"""`
			`if page == None:`
			`return`
			`else:`
			`self.die_seite = ""`
			`self.seiten_nummer = -1`
			`self.link_liste = {}`
			`self._load_page(page)`

			`@staticmethod`
			`def exists(page):`
			`""" checks if the page (i.e. the file) exists """`
Mehrere Verzeichnisse für Seiten ermöglicht 2016-10-31 16:40:10 +01:00			`for path in config.PAGES:`
Dateierweiterung .cept wird jetzt auch akzeptiert 2018-06-01 15:00:57 +02:00			`if os.path.isfile(path + page) or os.path.isfile(path + page + ".cept"):`
Typo 2016-10-31 16:42:16 +01:00			`return True`
Mehrere Verzeichnisse für Seiten ermöglicht 2016-10-31 16:40:10 +01:00			`return False`
changed tabs to 4 spaces + some minor fixes in rtxHelpers 2016-06-02 16:49:43 +02:00
			`def get_page(self):`
			`""" returns the page if loaded`
			`returns False if no page has been loaded`
			`"""`
			`if self.die_seite != "":`
			`return self.die_seite`
			`else:`
			`return False`

			`def set_page(self,pagedata):`
			`""" write the pagedata as the new page and (probably) the link list """`
			`self.die_seite = pagedata`
			`self.link_liste = self._get_link_list()`

			`def get_links(self):`
			`""" returns the list of links found in this page """`
			`return self.link_liste`

			`def get_link(self, link):`
			`""" returns the target for a given link`
			`returns False if this link does not exist`
			`"""`
			`if link in self.link_liste:`
			`return self.link_liste[link]`
			`else:`
			`return False`

			`def get_page_id(self):`
			`""" returns the id of the current page """`
			`return self.seiten_nummer`

			`def _load_page(self, page):`
			`""" Load a CEPT page from the file system`
			`returns False if the page cannot be found`
			`returns True if the page has been loaded`
			`"""`
Mehrere Verzeichnisse für Seiten ermöglicht 2016-10-31 16:40:10 +01:00			`for path in config.PAGES:`
			`if os.path.isfile(path + page):`
			`filename = path + page`
			`""" page has been found, now exit the for loop """`
			`break`
Dateierweiterung .cept wird jetzt auch akzeptiert 2018-06-01 15:00:57 +02:00			`if os.path.isfile(path + page + ".cept"):`
			`filename = path + page + ".cept"`
			`""" page has been found - with .cept extension """`
			`break`
Seiten auch finden, wenn Dateiname lowercase ist (z.B. 330331A -> 330331a) 2018-06-30 11:12:37 +02:00			`if os.path.isfile(path + page.lower()):`
			`filename = path + page.lower()`
			`""" page has been found with lowercase filename """`
			`break`
			`if os.path.isfile(path + page.lower() + ".cept"):`
			`filename = path + page.lower() + ".cept"`
			`""" page has been found with lowercase filename and .cept extension """`
			`break`
changed tabs to 4 spaces + some minor fixes in rtxHelpers 2016-06-02 16:49:43 +02:00			`else:`
Mehrere Verzeichnisse für Seiten ermöglicht 2016-10-31 16:40:10 +01:00			`""" page does not exist anywhere, so exit the function """`
changed tabs to 4 spaces + some minor fixes in rtxHelpers 2016-06-02 16:49:43 +02:00			`return False`

			`with open(filename, "rb") as f:`
			`self.die_seite = f.read()`
			`self.seiten_nummer = page`
bug fixes und weiter an der Maske (unfertig) 2016-06-10 14:48:28 +02:00			`self.link_liste = self._get_link_list()`
changed tabs to 4 spaces + some minor fixes in rtxHelpers 2016-06-02 16:49:43 +02:00			`return True`

			`def _get_link_list(self):`
			`""" private function which parses the CEPT page and extracts the link list`
			`returns True if links have been found`
			`returns False if no links have been found`
			`"""`
			`links = []`
			`links = re.findall("\x1f\x3d([^\x1f\x9b\x1b]+)", str(self.die_seite, "latin-1"))`
bug fixes und weiter an der Maske (unfertig) 2016-06-10 14:48:28 +02:00			`liste = {}`
changed tabs to 4 spaces + some minor fixes in rtxHelpers 2016-06-02 16:49:43 +02:00
			`for item in links:`
			`if item[0] != "0":`
			`link = item[1:3].strip()`
			`target = item[3:].strip()`
bug fixes und weiter an der Maske (unfertig) 2016-06-10 14:48:28 +02:00			`liste[link] = target`
changed tabs to 4 spaces + some minor fixes in rtxHelpers 2016-06-02 16:49:43 +02:00
bug fixes und weiter an der Maske (unfertig) 2016-06-10 14:48:28 +02:00			`logging.info(liste)`
Initial commit 2016-05-23 16:01:04 +02:00
bug fixes und weiter an der Maske (unfertig) 2016-06-10 14:48:28 +02:00			`return liste`