1
0
rtx/rtxPage.py

124 lines
3.7 KiB
Python
Raw Normal View History

2016-05-23 16:01:04 +02:00
# -*- coding: UTF-8 -*-
'''
rtx - RetroText
rtxPwage: Klasse zur Benutzung einer CEPT-Seite
by Anna Christina Naß <acn@acn.wtf>
released under GPL
'''
import os
import re
import config
import logging
class rtxPage:
""" Klasse zur Benutzung einer CEPT-Seite
"""
die_seite = ""
seiten_nummer = -1
link_liste = {}
def __init__(self, page = None):
""" create a new page object for the page 'page'
returns False if the page cannot be found
returns True if the page has been loaded
"""
if page == None:
return
else:
self.die_seite = ""
self.seiten_nummer = -1
self.link_liste = {}
self._load_page(page)
@staticmethod
def exists(page):
""" checks if the page (i.e. the file) exists """
for path in config.PAGES:
if os.path.isfile(path + page) or os.path.isfile(path + page + ".cept"):
2016-10-31 16:42:16 +01:00
return True
return False
def get_page(self):
""" returns the page if loaded
returns False if no page has been loaded
"""
if self.die_seite != "":
return self.die_seite
else:
return False
def set_page(self,pagedata):
""" write the pagedata as the new page and (probably) the link list """
self.die_seite = pagedata
self.link_liste = self._get_link_list()
def get_links(self):
""" returns the list of links found in this page """
return self.link_liste
def get_link(self, link):
""" returns the target for a given link
returns False if this link does not exist
"""
if link in self.link_liste:
return self.link_liste[link]
else:
return False
def get_page_id(self):
""" returns the id of the current page """
return self.seiten_nummer
def _load_page(self, page):
""" Load a CEPT page from the file system
returns False if the page cannot be found
returns True if the page has been loaded
"""
for path in config.PAGES:
if os.path.isfile(path + page):
filename = path + page
""" page has been found, now exit the for loop """
break
if os.path.isfile(path + page + ".cept"):
filename = path + page + ".cept"
""" page has been found - with .cept extension """
break
if os.path.isfile(path + page.lower()):
filename = path + page.lower()
""" page has been found with lowercase filename """
break
if os.path.isfile(path + page.lower() + ".cept"):
filename = path + page.lower() + ".cept"
""" page has been found with lowercase filename and .cept extension """
break
else:
""" page does not exist anywhere, so exit the function """
return False
with open(filename, "rb") as f:
self.die_seite = f.read()
self.seiten_nummer = page
self.link_liste = self._get_link_list()
return True
def _get_link_list(self):
""" private function which parses the CEPT page and extracts the link list
returns True if links have been found
returns False if no links have been found
"""
links = []
links = re.findall("\x1f\x3d([^\x1f\x9b\x1b]+)", str(self.die_seite, "latin-1"))
liste = {}
for item in links:
if item[0] != "0":
link = item[1:3].strip()
target = item[3:].strip()
liste[link] = target
logging.info(liste)
2016-05-23 16:01:04 +02:00
return liste