From a6b691e7db7f6a3267afd431a3a6adb1144bdd8d Mon Sep 17 00:00:00 2001 From: "A. Svensson" Date: Sun, 8 Feb 2015 14:38:02 +0100 Subject: [PATCH] Initial commit. --- LICENSE | 16 +++++++++ README.md | 1 + requirements.txt | 3 ++ src/parser.py | 94 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 requirements.txt create mode 100755 src/parser.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..079179f --- /dev/null +++ b/LICENSE @@ -0,0 +1,16 @@ +The MIT License (MIT) +Copyright (c) 2015 A. Svensson +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..1333ed7 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +TODO diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ae46ca1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ + +requests +beautifulsoup4 diff --git a/src/parser.py b/src/parser.py new file mode 100755 index 0000000..3156a9d --- /dev/null +++ b/src/parser.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python + +import re +import logging + +import requests +from bs4 import BeautifulSoup + + +URL = 'http://www.byond.com/games/exadv1/spacestation13' +PLAYER_COUNT = re.compile('Logged in: (\d+) player') + + +logging.basicConfig( + format = '%(asctime)s %(levelname)s %(message)s', + level = logging.INFO, +) + + +class ServerParser(object): + def __init__(self): + self.url = URL + + def run(self): + '''Run the parser and return a neat list of dicts containing server data.''' + raw_data = self._download_data() + servers = self._parse_data(raw_data) + return servers + + def _download_data(self): + '''Download raw data, either from local file or a web page.''' + logging.info('Downloading data from {} ...'.format(self.url)) + if self.url.startswith('http://') or self.url.startswith('https://'): + raw_data = requests.get(self.url).text.strip() + else: + # HACK: In case of local testing or debugging, since requests can't + # handle local files. + logging.debug('Opening local file...') + with open(self.url, 'r') as f: + raw_data = f.read().strip() + return raw_data + + def _parse_data(self, raw_data): + '''Parse the raw data and run through all servers.''' + logging.info('Parsing raw data...') + servers = [] + soup_data = BeautifulSoup(raw_data) + for server_data in soup_data.find_all('div', 'live_game_status'): + server = self._parse_server_data(server_data) + servers.append(server) + + logging.info('Number of servers parsed: {}'.format(len(servers))) + return servers + + def _parse_server_data(self, data): + '''Parse the individual parts of each server.''' + title = data.find('b').text.strip() + game_url = data.find('span', 'smaller').text + + tmp = data.find('a') + site_url = None + # Default means the server hasn't set a custom site url + if tmp and not tmp.text == 'Default': + try: + site_url = tmp['href'] + # Handle some funky servers... + if site_url == 'http://': + site_url = '' + except KeyError: + # Sometimes there's a tag without a href attribute + pass + + tmp = data.text + player_count = 0 + if tmp.find('No players.') == -1: + data = PLAYER_COUNT.search(tmp) + player_count = int(data.group(1)) + + server = dict( + title = title, + game_url = game_url, + site_url = site_url, + player_count = player_count, + ) + return server + + +if __name__ == '__main__': + parser = ServerParser() + parser.url = './dump.html' # Use a local file instead when testing + servers = parser.run() + for tmp in servers: + print '{}\nPlayers: {}\n'.format(tmp['title'], tmp['player_count']) +