import numpy as np
import pandas as pd
import re
from pathlib import Path
import gzip

here = Path(__file__).parent

def read_wg(path):
    r"""
    Reads a wg file as a numpy array of floats.
    Warnings: integers are converted to floats! The numbering of columns starts with 0 (different than in the documentation!)
    Arguments:
        path : (str) full path of the .wg file
    Returns
        numpy array with the models in the rows and the variables in the columns.
        Example: data[:,0] returns the model number (column #1 in the documentation)
    """
    data = pd.read_table(path,header=None)
    data = data[0][::3] # get rid of rows with non-relevant data
    data = data.apply(lambda x: np.fromstring(x, sep=" ")) # every row to numpy float arrays
    data = np.vstack(list(data)) # numpy data array
    return data

def read_v(path):
    r"""
    Reads a v file as a numpy array of floats.
    Warnings: integers are converted to floats! The numbering of columns starts with 0 (different than in the documentation!)
    If the v file contains more than one block of data (structure of the model), only the first block is taken and the rest is ignored.
    Arguments:
        path : (str) full path of the .wg file
    Returns
        numpy array with the models in the rows and the variables in the columns.
        Example: data[:,0] returns the layer number (column #1 in the documentation)
    """
    data = gzip.open(path,'rt')
    lines = data.readlines()
    """
    Format specification: comments: # (whole line ignored)
    Valid data has 93 columns. Block separators are lines with 5 entries. Only the first block is valid.
    """
    valid = []
    firstBlock = False
    for line in lines:
        if '#' in line: continue
        columnCount = len(line.split())
        if columnCount < 90 and firstBlock == False:
            firstBlock = True
        elif columnCount < 90 and firstBlock == True:
            break
        else:
            valid.append(line)
    # convert each line to a numpy array, then stack them
    data = [ np.fromstring(line, sep=" ") for line in valid ]
    data = np.vstack(data)
    return data



def get_column(filetype,term1='',var='.*',esc=False,moreterms=[]):
    """
    Searchs for a column in the file specification and returns the first match.
    Arguments:
        filetype: 'v' or 'g' (for wg files, use g).
        term1, *moreterms: (str) terms to search in the description of the variables.
            A match is found if the term is contained within the description.
        var: search for a specific variable name. This matches exactly the variable (regex can be used).
        esc: escape regex (default False, i.e., treat the string as regex)
    Returns:
        Column number of the first match starting with 0. If more than one variable is found, a list of results is printed.
        The returned index can directly be used to access the data in a numpy array
    Requires:
        files 'g_format.txt' and 'v_format.txt' (in the same directory)
    Examples:
        get_column('g','mass') # prints all results that contain 'mass' in the description
        get_column('v',var='eps') # matches the variable name 'eps' only (but not 'epsx' or 'epsy', to get them as well, use r'eps*')
    """
    fname = here / f"{filetype}_format.txt"
    format_description = fname.open().read()
    if esc == True:
        term1 = re.escape(term1)
    else:
        term1 = term1.replace(" ",r"\s+")
    moreterms = [otherterm.replace(" ",r"\s*") for otherterm in moreterms]
    concat_otherterms = ".*"+".*".join(moreterms)
    tosearch = r'^([0-9]+)\s+('+var+r')\s+"(.*'+term1+concat_otherterms+')"'
    results = re.findall(tosearch,
                        format_description, flags=re.M|re.I)
    if len(results) == 0:
        print(tosearch)
        raise Exception("Column not found")
    elif len(results) == 1:
        return int(results[0][0])-1
    else:
        print(f"===== Matching columns in {filetype} file ========")
        for result in results:
            print(result[0],"\t",result[1],"\t",result[2])
        print(f"--------------------------------------------------")
        return int(results[0][0])-1

def print_description(filetype,column):
    r"""
    Prints the description of a column number
    Arguments:
        filetype: 'v' or 'g'
        column: (int) column number to query starting from 0 (not as in the format specification file)
    Requires:
        files 'g_format.txt' and 'v_format.txt' (in the same directory)
    """
    fname = here / f"{filetype}_format.txt"
    format_description = fname.open().read()
    results = re.findall(r'^('+str(column+1)+r')\s+(.*)',format_description, flags=re.M|re.I)
    if len(results) == 0:
        raise Exception("Column not found")
    else:
        print(f"===== Matching columns in {filetype} file ========")
        for result in results:
            print(column,"(+1)\t",result[1],"\t")
        print(f"-----------------------------------------")


class cgs: pass
cgs.kg = 1e3 # g
cgs.m = 1e2 # cm
cgs.s = 1 # s
cgs.N = cgs.kg*cgs.m/cgs.s**2 # dyne
cgs.J = cgs.N * cgs.m # erg
cgs.Da = 1.66e-27*cgs.kg # Dalton in g
# Physical constants in cgs
cgs.G = 6.67e-11 * cgs.N * cgs.kg**(-2) * cgs.m**2 # dyne * cm^2 * g^-2
cgs.k_B = 1.38e-23 * cgs.J # erg/K
cgs.au = 1.495e11 * cgs.m # cm
cgs.pc = 3.09e16 * cgs.m # cm
cgs.Msun = 2e33 # g
cgs.yr = 3600*24*365 # s
cgs.c = 2.99792458e+10 # cm/s
cgs.ly = 63241.0 * cgs.au # cm
cgs.Rsun = 6.957e10 # cm
cgs.Lsun = 3.8427e33 # erg / s
cgs.sigma = 5.67040e-5 # Stefan-Boltzman constant, erg * s^-1 * cm^-2 * K^-4

