Source code for fhiry.fhiry

"""
 Copyright (c) 2020 Bell Eapen

 This software is released under the MIT License.
 https://opensource.org/licenses/MIT
"""

import pandas as pd
import json
import os
from .base_fhiry import BaseFhiry
import logging
from tqdm import tqdm

logger = logging.getLogger(__name__)

[docs]class Fhiry(BaseFhiry): def __init__(self, config_json=None): self._filename = "" self._folder = "" super().__init__(config_json=config_json) @property def df(self): return self._df @property def filename(self): return self._filename @property def folder(self): return self._folder @property def delete_col_raw_coding(self): return self._delete_col_raw_coding @filename.setter def filename(self, filename): self._filename = filename self._df = self.read_bundle_from_file(filename) @folder.setter def folder(self, folder): self._folder = folder @delete_col_raw_coding.setter def delete_col_raw_coding(self, delete_col_raw_coding): self._delete_col_raw_coding = delete_col_raw_coding
[docs] def read_bundle_from_file(self, filename): with open(filename, encoding='utf8', mode='r') as f: json_in = f.read() json_in = json.loads(json_in) return pd.json_normalize(json_in['entry'])
[docs] def process_source(self): """Read a single JSON resource or a directory full of JSON resources ONLY COMMON FIELDS IN ALL resources will be mapped """ if self._folder: df = pd.DataFrame(columns=[]) for file in tqdm(os.listdir(self._folder)): if file.endswith(".json"): self._df = self.read_bundle_from_file( os.path.join(self._folder, file)) self.process_df() if df.empty: df = self._df else: df = pd.concat([df, self._df]) self._df = df elif self._filename: self._df = self.read_bundle_from_file(self._filename) super().process_df()
[docs] def process_file(self, filename): self._df = self.read_bundle_from_file(filename) self.process_df() return self._df
[docs] def process_bundle_dict(self, bundle_dict): self._df = self.read_bundle_from_bundle_dict(bundle_dict) self.process_df() return self._df