# -*- coding: UTF-8 -*- ''' rtx - RetroText rtxPwage: Klasse zur Benutzung einer CEPT-Seite by Anna Christina Naß released under GPL ''' import os import re import config import logging class rtxPage: """ Klasse zur Benutzung einer CEPT-Seite """ die_seite = "" seiten_nummer = -1 link_liste = {} def __init__(self, page = None): """ create a new page object for the page 'page' returns False if the page cannot be found returns True if the page has been loaded """ if page == None: return else: self.die_seite = "" self.seiten_nummer = -1 self.link_liste = {} self._load_page(page) @staticmethod def exists(page): """ checks if the page (i.e. the file) exists """ for path in config.PAGES: if os.path.isfile(path + page) or os.path.isfile(path + page + ".cept"): return True return False def get_page(self): """ returns the page if loaded returns False if no page has been loaded """ if self.die_seite != "": return self.die_seite else: return False def set_page(self,pagedata): """ write the pagedata as the new page and (probably) the link list """ self.die_seite = pagedata self.link_liste = self._get_link_list() def get_links(self): """ returns the list of links found in this page """ return self.link_liste def get_link(self, link): """ returns the target for a given link returns False if this link does not exist """ if link in self.link_liste: return self.link_liste[link] else: return False def get_page_id(self): """ returns the id of the current page """ return self.seiten_nummer def _load_page(self, page): """ Load a CEPT page from the file system returns False if the page cannot be found returns True if the page has been loaded """ for path in config.PAGES: if os.path.isfile(path + page): filename = path + page """ page has been found, now exit the for loop """ break if os.path.isfile(path + page + ".cept"): filename = path + page + ".cept" """ page has been found - with .cept extension """ break else: """ page does not exist anywhere, so exit the function """ return False with open(filename, "rb") as f: self.die_seite = f.read() self.seiten_nummer = page self.link_liste = self._get_link_list() return True def _get_link_list(self): """ private function which parses the CEPT page and extracts the link list returns True if links have been found returns False if no links have been found """ links = [] links = re.findall("\x1f\x3d([^\x1f\x9b\x1b]+)", str(self.die_seite, "latin-1")) liste = {} for item in links: if item[0] != "0": link = item[1:3].strip() target = item[3:].strip() liste[link] = target logging.info(liste) return liste