| | import pandas as pd |
| | import numpy as np |
| | from matplotlib.colors import LinearSegmentedColormap |
| |
|
| | PAGE_MARKDOWN = """ |
| | <style> |
| | .reportview-container { |
| | margin-top: -2em; |
| | } |
| | #MainMenu {visibility: hidden;} |
| | .stDeployButton {display:none;} |
| | footer {visibility: hidden;} |
| | #stDecoration {display:none;} |
| | </style> |
| | """ |
| |
|
| | PAGE_INFO = """[](https://huggingface.co/datasets/RMT-team/babilong) | [GitHub](https://github.com/booydar/babilong) | [Paper](https://arxiv.org/abs/2406.10149) | [HF Dataset](https://huggingface.co/datasets/RMT-team/babilong) | [HF Dataset 1k samples per task](https://huggingface.co/datasets/RMT-team/babilong-1k-samples) |""" |
| |
|
| | LENGTHS = ['0k', '1k', '2k', '4k', '8k', '16k', '32k', '64k', '128k', '512k', '1M', '2M', '10M'] |
| | LENGTHS_32k = ['0k', '1k', '2k', '4k', '8k', '16k', '32k'] |
| | LENGTHS_128k = ['0k', '1k', '2k', '4k', '8k', '16k', '32k', '64k', '128k'] |
| |
|
| |
|
| | def load_results(): |
| | old_results_path = "data/leaderboard-v0_results.csv" |
| | new_results_path = "babilong/babilong_results/all_results.csv" |
| | old_results = pd.read_csv(old_results_path) |
| | new_results = pd.read_csv(new_results_path) |
| |
|
| | def normalize_model_name(name): |
| | if '/' in name: |
| | name = name.split('/')[-1] |
| | return name.lower() |
| |
|
| | old_results['normalized_name'] = old_results['model_name'].apply(normalize_model_name) |
| | new_results['normalized_name'] = new_results['model_name'].apply(normalize_model_name) |
| | |
| | duplicate_models = set(old_results['normalized_name']).intersection(set(new_results['normalized_name'])) |
| | old_results_filtered = old_results[~old_results['normalized_name'].isin(duplicate_models)] |
| | res = pd.concat([old_results_filtered, new_results]) |
| | res.drop('normalized_name', axis=1, inplace=True) |
| |
|
| | res['task'] = res['task'].str.replace('avg', 'avg(qa1-5)') |
| |
|
| | res.replace(-1, np.nan, inplace=True) |
| | res['≤32k'] = res[LENGTHS_32k].mean(axis=1) |
| | res['≤128k'] = res[LENGTHS_128k].mean(axis=1) |
| |
|
| | |
| | res['max_eval_length_idx'] = res.apply( |
| | lambda row: max([LENGTHS.index(col) for col in LENGTHS if not pd.isna(row[col])], default=-1), axis=1) |
| | res['max_eval_length'] = res['max_eval_length_idx'].apply(lambda x: LENGTHS[x]) |
| |
|
| | |
| | res.sort_values(['max_eval_length_idx', '≤128k'], ascending=[False, False], inplace=True) |
| |
|
| | return res |
| |
|
| |
|
| | |
| | def relative_luminance(rgba) -> float: |
| | """ |
| | Calculate relative luminance of a color. |
| | |
| | The calculation adheres to the W3C standards |
| | (https://www.w3.org/WAI/GL/wiki/Relative_luminance) |
| | |
| | Parameters |
| | ---------- |
| | color : rgb or rgba tuple |
| | |
| | Returns |
| | ------- |
| | float |
| | The relative luminance as a value from 0 to 1 |
| | """ |
| | r, g, b = ( |
| | x / 12.92 if x <= 0.04045 else ((x + 0.055) / 1.055) ** 2.4 |
| | for x in rgba[:3] |
| | ) |
| | return 0.2126 * r + 0.7152 * g + 0.0722 * b |
| |
|
| |
|
| | def style_dataframe(df): |
| | """ |
| | Style a pandas DataFrame with a color gradient. |
| | """ |
| | styled_df = df.copy() |
| | numeric_columns = styled_df.columns[1:] |
| |
|
| | def color_scale(val): |
| | cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256) |
| |
|
| | if pd.isna(val): |
| | return 'background-color: white; color: white;' |
| |
|
| | min_val = 0 |
| | max_val = 100 |
| | normalized = (val - min_val) / (max_val - min_val) |
| | rgba = cmap(normalized) |
| | text_color = 'white' if relative_luminance(rgba) < 0.408 else 'black' |
| | return f'background-color: rgba({rgba[0]*255},{rgba[1]*255},{rgba[2]*255},{rgba[3]}); color: {text_color}' |
| |
|
| | styled_df = styled_df.style.map(color_scale, subset=numeric_columns) |
| | return styled_df |
| |
|