Repository URL to install this package:
|
Version:
0.5.1 ▾
|
waveglider-connect
/
wgc.py
|
|---|
import requests
import pandas as pd
import os
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
auth_urls = {
"geomar": "https://geomar.wgms.com/webservices/entityapi.asmx",
"marum": "https://ub.wgms.com/webservices/entityapi.asmx"
}
data_urls = {
"geomar": "http://geomar.wgms.com/pages/exportPage.aspx?viewid=69676&entitytype=42",
"marum": "https://ub.wgms.com/pages/exportPage.aspx?viewid=70272&entitytype=42"
}
# Vehicle IDs:
# GEOMAR 1 (GEOSEA): 965
# GEOMAR 2 (CVOO): 1045
# GEOMAR 3 (OMAN): 3100
# GEOMAR 4 (MOSES): 4349
# MARUM 1622
vehicle_ids = {
#'WG_GMR1': 965,
'WG_GMR2': 1045,
#'WG_GMR3': 3100,
'WG_GMR4': 4349,
#'WG_M': 1622
}
glider_ids = {value: key for key, value in vehicle_ids.items()}
class WaveGliderConnectError(Exception):
pass
class Config(object):
WGC_AUTH_FILE_GMR = os.environ.get('WGC_AUTH_FILE_GMR')
WGC_AUTH_FILE_MARUM = os.environ.get('WGC_AUTH_FILE_MARUM')
def get_waveglider_data():
"""Gets data from all known wavegliders.
**Make sure auth files are set via
WGC_AUTH_FILE_GMR and WGC_AUTH_FILE_MARUM
environment variables!**
"""
df_marum = None
df_gmr = None
auth_gmr = Config().WGC_AUTH_FILE_GMR
if auth_gmr:
wgconn = WaveGliderConnect(auth_gmr, 'geomar')
wgconn.update_data()
wgconn.drop_unused_glider_data()
wgconn.drop_invalid_positions()
df_gmr = wgconn.df
else:
logger.warning('Cannot get data for GEOMAR Wavegliders, no auth file provided!')
auth_marum = Config().WGC_AUTH_FILE_MARUM
if auth_marum:
wgconn = WaveGliderConnect(auth_marum, 'marum')
wgconn.update_data()
wgconn.drop_unused_glider_data()
wgconn.drop_invalid_positions()
df_marum = wgconn.df
else:
logger.warning('Cannot get data for MARUM Wavegliders, no auth file provided!')
if df_gmr is not None and df_marum is not None:
return df_gmr.append(df_marum)
elif df_gmr is not None:
return df_gmr
elif df_marum is not None:
return df_marum
class WaveGliderConnect(object):
def __init__(self, auth_xml_path, sensor_home):
self.df = None
self.sensor_home = sensor_home.lower()
self.auth_url = auth_urls.get(self.sensor_home)
if not self.auth_url:
raise WaveGliderConnectError(f'Unknown sensor home: {sensor_home}. Select from {auth_urls.keys()}')
self.data_url = data_urls.get(self.sensor_home)
if not self.data_url:
raise WaveGliderConnectError(f'Unknown sensor home: {sensor_home}. Select from {data_urls.keys()}')
self.auth_xml = auth_xml_path
self.session = requests.Session()
def _auth(self):
with open(self.auth_xml, 'rb') as f:
auth_headers = {'Content-Type': 'text/xml; charset=utf-8', 'SOAPAction': None}
r = self.session.post(url=self.auth_url, headers=auth_headers, data=f)
if not r.ok:
raise WaveGliderConnectError(f'Could not authenticate! got status:{r.status_code} message: {r.text}')
@staticmethod
def _is_auth_response(response):
"""Checks if response from data request is an authorized response.
Response from data url is 200 even if not authorised. In this case, an html page with an error
is returned. This methods checks if response is html or not to infer if authorisation is needed.
"""
return not response.text.startswith('<!DOCTYPE html>')
def _poll_data(self):
r = self.session.get(self.data_url)
if not r.ok:
raise WaveGliderConnectError(f'Error fetching data from {self.data_url}:{r.status_code} message: {r.text}')
if not WaveGliderConnect._is_auth_response(r):
self._auth()
r = self.session.get(self.data_url)
try:
return pd.read_csv(pd.compat.StringIO(r.text))
except Exception as exc:
raise WaveGliderConnectError('Error parsing data!') from exc
def update_data(self):
"""Polls data from Wave Glider Server and harmonizes data format"""
df = self._poll_data()
glider_info = [f'{v_id}:{glider_ids.get(v_id)}' for v_id in list(df['Vehicle'].unique())]
logger.info(f"Got data for gliders {glider_info}")
# we need shortnames, not vehicle ids
df['platform_shortname'] = [glider_ids.get(v_id) for v_id in df['Vehicle']]
# harmonize column names
df['obs_timestamp'] = df['TimeStamp']
df['lat'] = df['Lat (deg)']
df['lon'] = df['Lon (deg)']
df['speed_over_ground'] = df['Ground Speed(kt)']
df['heading'] = df['POG (deg)']
# convert to timestamp data
df['obs_timestamp'] = pd.to_datetime(df['obs_timestamp'], dayfirst=True)
to_drop = []
for col in df:
if not col in ['platform_shortname', 'obs_timestamp', 'lat', 'lon', 'speed_over_ground', 'heading']:
to_drop.append(col)
self.df = df.drop(to_drop, axis = 1)
def drop_unused_glider_data(self):
""" data from server contains gliders we are not interested in. For those, shortname was not resolved
from vehicle id. Drop those columns.
"""
self.df.dropna(axis=0, inplace=True)
def drop_invalid_positions(self):
"""Data can contain invalid positions, e.g. if gps is not (yet) initialized. These positions are removed.
Invalid positions are always (0,0)
"""
self.df.drop(self.df[(self.df.lat == 0.0) & (self.df.lon == 0.0)].index, axis=0, inplace=True)
def get_data_for_glider(self, glider_shortname):
if not glider_shortname in vehicle_ids.keys():
raise WaveGliderConnectError(f'Unknown glider: {glider_shortname}')
if self.df is None:
return None
return self.df[self.df['Vehicle']]