# -*- coding: UTF-8 -*- ''' rtx - RetroText rtxPwage: Klasse zur Benutzung einer CEPT-Seite by Anna Christina Naß released under GPL ''' import os import re import config import logging class rtxPage: """ Klasse zur Benutzung einer CEPT-Seite """ die_seite = "" seiten_nummer = -1 link_liste = {} def __init__(self, page = None): """ create a new page object for the page 'page' returns False if the page cannot be found returns True if the page has been loaded """ if page == None: return else: self.die_seite = "" self.seiten_nummer = -1 self.link_liste = {} self._load_page(page) @staticmethod def exists(page): """ checks if the page (i.e. the file) exists """ return os.path.isfile(config.PAGES + page) def get_page(self): """ returns the page if loaded returns False if no page has been loaded """ if self.die_seite != "": return self.die_seite else: return False def set_page(self,pagedata): """ write the pagedata as the new page and (probably) the link list """ self.die_seite = pagedata self.link_liste = self._get_link_list() def get_links(self): """ returns the list of links found in this page """ return self.link_liste def get_link(self, link): """ returns the target for a given link returns False if this link does not exist """ if link in self.link_liste: return self.link_liste[link] else: return False def get_page_id(self): """ returns the id of the current page """ return self.seiten_nummer def _load_page(self, page): """ Load a CEPT page from the file system returns False if the page cannot be found returns True if the page has been loaded """ filename = config.PAGES + page if os.path.isfile(filename): with open(filename, "rb") as f: self.die_seite = f.read() self.seiten_nummer = page link_liste = self._get_link_list() return True else: return False def _get_link_list(self): """ private function which parses the CEPT page and extracts the link list returns True if links have been found returns False if no links have been found """ links = [] links = re.findall("\x1f\x3d([^\x1f\x9b\x1b]+)", str(self.die_seite, "latin-1")) for item in links: if item[0] != "0": link = item[1:3].strip() target = item[3:].strip() self.link_liste[link] = target logging.info(self.link_liste) if self.link_liste == {}: return False else: return True