1
0
rtx/rtxPage.py

112 lines
3.1 KiB
Python

# -*- coding: UTF-8 -*-
'''
rtx - RetroText
rtxPwage: Klasse zur Benutzung einer CEPT-Seite
by Anna Christina Naß <acn@acn.wtf>
released under GPL
'''
import os
import re
import config
import logging
class rtxPage:
""" Klasse zur Benutzung einer CEPT-Seite
"""
die_seite = ""
seiten_nummer = -1
link_liste = {}
def __init__(self, page = None):
""" create a new page object for the page 'page'
returns False if the page cannot be found
returns True if the page has been loaded
"""
if page == None:
return
else:
self.die_seite = ""
self.seiten_nummer = -1
self.link_liste = {}
self._load_page(page)
@staticmethod
def exists(page):
""" checks if the page (i.e. the file) exists """
if os.path.isfile(config.PAGES + page):
return True
elif os.path.isfile(config.DEMOPAGES + page):
return True
else:
return False
def get_page(self):
""" returns the page if loaded
returns False if no page has been loaded
"""
if self.die_seite != "":
return self.die_seite
else:
return False
def set_page(self,pagedata):
""" write the pagedata as the new page and (probably) the link list """
self.die_seite = pagedata
self.link_liste = self._get_link_list()
def get_links(self):
""" returns the list of links found in this page """
return self.link_liste
def get_link(self, link):
""" returns the target for a given link
returns False if this link does not exist
"""
if link in self.link_liste:
return self.link_liste[link]
else:
return False
def get_page_id(self):
""" returns the id of the current page """
return self.seiten_nummer
def _load_page(self, page):
""" Load a CEPT page from the file system
returns False if the page cannot be found
returns True if the page has been loaded
"""
if os.path.isfile(config.PAGES + page):
filename = config.PAGES + page
elif os.path.isfile(config.DEMOPAGES + page):
filename = config.DEMOPAGES + page
else:
return False
with open(filename, "rb") as f:
self.die_seite = f.read()
self.seiten_nummer = page
self.link_liste = self._get_link_list()
return True
def _get_link_list(self):
""" private function which parses the CEPT page and extracts the link list
returns True if links have been found
returns False if no links have been found
"""
links = []
links = re.findall("\x1f\x3d([^\x1f\x9b\x1b]+)", str(self.die_seite, "latin-1"))
liste = {}
for item in links:
if item[0] != "0":
link = item[1:3].strip()
target = item[3:].strip()
liste[link] = target
logging.info(liste)
return liste