Initial commit.
This commit is contained in:
commit
a6b691e7db
16
LICENSE
Normal file
16
LICENSE
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
Copyright (c) 2015 A. Svensson
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
|
||||||
|
requests
|
||||||
|
beautifulsoup4
|
||||||
94
src/parser.py
Executable file
94
src/parser.py
Executable file
@ -0,0 +1,94 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
URL = 'http://www.byond.com/games/exadv1/spacestation13'
|
||||||
|
PLAYER_COUNT = re.compile('Logged in: (\d+) player')
|
||||||
|
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
format = '%(asctime)s %(levelname)s %(message)s',
|
||||||
|
level = logging.INFO,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ServerParser(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.url = URL
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
'''Run the parser and return a neat list of dicts containing server data.'''
|
||||||
|
raw_data = self._download_data()
|
||||||
|
servers = self._parse_data(raw_data)
|
||||||
|
return servers
|
||||||
|
|
||||||
|
def _download_data(self):
|
||||||
|
'''Download raw data, either from local file or a web page.'''
|
||||||
|
logging.info('Downloading data from {} ...'.format(self.url))
|
||||||
|
if self.url.startswith('http://') or self.url.startswith('https://'):
|
||||||
|
raw_data = requests.get(self.url).text.strip()
|
||||||
|
else:
|
||||||
|
# HACK: In case of local testing or debugging, since requests can't
|
||||||
|
# handle local files.
|
||||||
|
logging.debug('Opening local file...')
|
||||||
|
with open(self.url, 'r') as f:
|
||||||
|
raw_data = f.read().strip()
|
||||||
|
return raw_data
|
||||||
|
|
||||||
|
def _parse_data(self, raw_data):
|
||||||
|
'''Parse the raw data and run through all servers.'''
|
||||||
|
logging.info('Parsing raw data...')
|
||||||
|
servers = []
|
||||||
|
soup_data = BeautifulSoup(raw_data)
|
||||||
|
for server_data in soup_data.find_all('div', 'live_game_status'):
|
||||||
|
server = self._parse_server_data(server_data)
|
||||||
|
servers.append(server)
|
||||||
|
|
||||||
|
logging.info('Number of servers parsed: {}'.format(len(servers)))
|
||||||
|
return servers
|
||||||
|
|
||||||
|
def _parse_server_data(self, data):
|
||||||
|
'''Parse the individual parts of each server.'''
|
||||||
|
title = data.find('b').text.strip()
|
||||||
|
game_url = data.find('span', 'smaller').text
|
||||||
|
|
||||||
|
tmp = data.find('a')
|
||||||
|
site_url = None
|
||||||
|
# Default means the server hasn't set a custom site url
|
||||||
|
if tmp and not tmp.text == 'Default':
|
||||||
|
try:
|
||||||
|
site_url = tmp['href']
|
||||||
|
# Handle some funky servers...
|
||||||
|
if site_url == 'http://':
|
||||||
|
site_url = ''
|
||||||
|
except KeyError:
|
||||||
|
# Sometimes there's a <a> tag without a href attribute
|
||||||
|
pass
|
||||||
|
|
||||||
|
tmp = data.text
|
||||||
|
player_count = 0
|
||||||
|
if tmp.find('No players.') == -1:
|
||||||
|
data = PLAYER_COUNT.search(tmp)
|
||||||
|
player_count = int(data.group(1))
|
||||||
|
|
||||||
|
server = dict(
|
||||||
|
title = title,
|
||||||
|
game_url = game_url,
|
||||||
|
site_url = site_url,
|
||||||
|
player_count = player_count,
|
||||||
|
)
|
||||||
|
return server
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = ServerParser()
|
||||||
|
parser.url = './dump.html' # Use a local file instead when testing
|
||||||
|
servers = parser.run()
|
||||||
|
for tmp in servers:
|
||||||
|
print '{}\nPlayers: {}\n'.format(tmp['title'], tmp['player_count'])
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user