Initial commit.

This commit is contained in:
A. Svensson 2015-02-08 14:38:02 +01:00
commit a6b691e7db
4 changed files with 114 additions and 0 deletions

16
LICENSE Normal file
View File

@ -0,0 +1,16 @@
The MIT License (MIT)
Copyright (c) 2015 A. Svensson
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

1
README.md Normal file
View File

@ -0,0 +1 @@
TODO

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
requests
beautifulsoup4

94
src/parser.py Executable file
View File

@ -0,0 +1,94 @@
#!/usr/bin/env python
import re
import logging
import requests
from bs4 import BeautifulSoup
URL = 'http://www.byond.com/games/exadv1/spacestation13'
PLAYER_COUNT = re.compile('Logged in: (\d+) player')
logging.basicConfig(
format = '%(asctime)s %(levelname)s %(message)s',
level = logging.INFO,
)
class ServerParser(object):
def __init__(self):
self.url = URL
def run(self):
'''Run the parser and return a neat list of dicts containing server data.'''
raw_data = self._download_data()
servers = self._parse_data(raw_data)
return servers
def _download_data(self):
'''Download raw data, either from local file or a web page.'''
logging.info('Downloading data from {} ...'.format(self.url))
if self.url.startswith('http://') or self.url.startswith('https://'):
raw_data = requests.get(self.url).text.strip()
else:
# HACK: In case of local testing or debugging, since requests can't
# handle local files.
logging.debug('Opening local file...')
with open(self.url, 'r') as f:
raw_data = f.read().strip()
return raw_data
def _parse_data(self, raw_data):
'''Parse the raw data and run through all servers.'''
logging.info('Parsing raw data...')
servers = []
soup_data = BeautifulSoup(raw_data)
for server_data in soup_data.find_all('div', 'live_game_status'):
server = self._parse_server_data(server_data)
servers.append(server)
logging.info('Number of servers parsed: {}'.format(len(servers)))
return servers
def _parse_server_data(self, data):
'''Parse the individual parts of each server.'''
title = data.find('b').text.strip()
game_url = data.find('span', 'smaller').text
tmp = data.find('a')
site_url = None
# Default means the server hasn't set a custom site url
if tmp and not tmp.text == 'Default':
try:
site_url = tmp['href']
# Handle some funky servers...
if site_url == 'http://':
site_url = ''
except KeyError:
# Sometimes there's a <a> tag without a href attribute
pass
tmp = data.text
player_count = 0
if tmp.find('No players.') == -1:
data = PLAYER_COUNT.search(tmp)
player_count = int(data.group(1))
server = dict(
title = title,
game_url = game_url,
site_url = site_url,
player_count = player_count,
)
return server
if __name__ == '__main__':
parser = ServerParser()
parser.url = './dump.html' # Use a local file instead when testing
servers = parser.run()
for tmp in servers:
print '{}\nPlayers: {}\n'.format(tmp['title'], tmp['player_count'])