108 lines
2.4 KiB
Python
108 lines
2.4 KiB
Python
# -*- coding: UTF-8 -*-
|
|
'''
|
|
rtx - RetroText
|
|
rtxPwage: Klasse zur Benutzung einer CEPT-Seite
|
|
by Anna Christina Naß <acn@acn.wtf>
|
|
released under GPL
|
|
'''
|
|
|
|
import os
|
|
import re
|
|
import config
|
|
import logging
|
|
|
|
class rtxPage:
|
|
""" Klasse zur Benutzung einer CEPT-Seite
|
|
"""
|
|
|
|
die_seite = ""
|
|
seiten_nummer = -1
|
|
link_liste = {}
|
|
|
|
def __init__(self, page = None):
|
|
""" create a new page object for the page 'page'
|
|
returns False if the page cannot be found
|
|
returns True if the page has been loaded
|
|
"""
|
|
if page == None:
|
|
return
|
|
else:
|
|
self.die_seite = ""
|
|
self.seiten_nummer = -1
|
|
self.link_liste = {}
|
|
self._load_page(page)
|
|
|
|
@staticmethod
|
|
def exists(page):
|
|
""" checks if the page (i.e. the file) exists """
|
|
return os.path.isfile(config.PAGES + page)
|
|
|
|
def get_page(self):
|
|
""" returns the page if loaded
|
|
returns False if no page has been loaded
|
|
"""
|
|
if self.die_seite != "":
|
|
return self.die_seite
|
|
else:
|
|
return False
|
|
|
|
def set_page(self,pagedata):
|
|
""" write the pagedata as the new page and (probably) the link list """
|
|
self.die_seite = pagedata
|
|
self.link_liste = self._get_link_list()
|
|
|
|
def get_links(self):
|
|
""" returns the list of links found in this page """
|
|
return self.link_liste
|
|
|
|
def get_link(self, link):
|
|
""" returns the target for a given link
|
|
returns False if this link does not exist
|
|
"""
|
|
if link in self.link_liste:
|
|
return self.link_liste[link]
|
|
else:
|
|
return False
|
|
|
|
def get_page_id(self):
|
|
""" returns the id of the current page """
|
|
return self.seiten_nummer
|
|
|
|
def _load_page(self, page):
|
|
""" Load a CEPT page from the file system
|
|
returns False if the page cannot be found
|
|
returns True if the page has been loaded
|
|
"""
|
|
filename = config.PAGES + page
|
|
|
|
if os.path.isfile(filename):
|
|
with open(filename, "rb") as f:
|
|
self.die_seite = f.read()
|
|
self.seiten_nummer = page
|
|
link_liste = self._get_link_list()
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def _get_link_list(self):
|
|
""" private function which parses the CEPT page and extracts the link list
|
|
returns True if links have been found
|
|
returns False if no links have been found
|
|
"""
|
|
links = []
|
|
links = re.findall("\x1f\x3d([^\x1f\x9b\x1b]+)", str(self.die_seite, "latin-1"))
|
|
|
|
for item in links:
|
|
if item[0] != "0":
|
|
link = item[1:3].strip()
|
|
target = item[3:].strip()
|
|
self.link_liste[link] = target
|
|
|
|
logging.info(self.link_liste)
|
|
|
|
if self.link_liste == {}:
|
|
return False
|
|
else:
|
|
return True
|
|
|