Skip to content

Commit

Permalink
update-layout-add-evaluation (#17)
Browse files Browse the repository at this point in the history
- add comment to divide functions/ui (c2fbbc311616f60f3dbc1e546d20517329b41486)
- fix typo (3ef1fed40b702eb482f9017241a2d3641cde3946)
- move sign in button to another column (ea29202ac74e3b567f8bbc83f2dd01edeb7e00b5)
- make sign in button smaller (3b5e775206b9e2276d8d5afd14d5a2ae6eb0f852)
- remove repeated import (9c1769a069aa5cadade2e120ebbfbb3524b4a71c)
- move sign in button to the right (5d91425bc46bbed12e76279e32eed828738f2e78)
- modify column width and typos (2b5c2e3fa953fcd1e7bbc5d50fa1eaa18cf51a7f)
- update successful message and pipeline code (4234ad816ad254da4dc0a2bdf4f6ee901f3ab647)
- update dataframe visualizations (7350fc6b3cdabd3c88562f7ebea772ea936b293b)
- update text and order parameter layout (45693e1d9d5340197d0a5298329a1b176836e5e9)
- typo (2673ebc69f8b3ee53ca0bea400abe8b18dcec6c7)
- add temperature for system prompt (857f1ba71f10ddb10f045923601746daed130b19)
- update textcat (separate prompt and labels) and use input parameters (4e193106207eda3f59650448038a680c25075972)
- update sft and use input parameters (dea11022bc5c78e08481e4e90bbb73b0402cdadc)
- update push dataset (49d5948eb076fc8b3354a9d4acdaac477fc0c398)
- add evaluation task (34371d30aa99cdb709c9def84739ab3b8b7fa611)
- hide pipeline ui each time it generates (c26510fcff621c6a144917e1a56d5f87dd41fd41)
- move order hide pipeline ui (1b00519115b913bff86a6f2ba061f97eb860e78a)
- merge remote tracking branch (1c412e2113c3889b13572af931a3be19fc93df5a)
  • Loading branch information
Sara Han Díaz authored and system committed Dec 3, 2024
1 parent 7afacd5 commit fb096d2
Show file tree
Hide file tree
Showing 11 changed files with 1,425 additions and 555 deletions.
6 changes: 3 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from src.distilabel_dataset_generator._tabbedinterface import TabbedInterface
from src.distilabel_dataset_generator.apps.faq import app as faq_app
from src.distilabel_dataset_generator.apps.sft import app as sft_app
from src.distilabel_dataset_generator.apps.eval import app as eval_app
from src.distilabel_dataset_generator.apps.textcat import app as textcat_app

theme ='argilla/argilla-theme'
Expand All @@ -25,12 +26,11 @@
"""

demo = TabbedInterface(
[textcat_app, sft_app, faq_app],
["Text Classification", "Supervised Fine-Tuning", "FAQ"],
[textcat_app, sft_app, eval_app, faq_app],
["Text Classification", "Supervised Fine-Tuning", "Evaluation", "FAQ"],
css=css,
title="""
<h1>Synthetic Data Generator</h1>
<h3>Build datasets using natural language</h3>
""",
head="Synthetic Data Generator",
theme=theme,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ authors = [
{name = "davidberenstein1957", email = "[email protected]"},
]
dependencies = [
"distilabel[hf-inference-endpoints,argilla,outlines]>=1.4.1",
"distilabel[hf-inference-endpoints,argilla,outlines,instructor]>=1.4.1",
"gradio[oauth]<5.0.0",
"transformers>=4.44.2",
"sentence-transformers>=3.2.0",
Expand Down
6 changes: 4 additions & 2 deletions src/distilabel_dataset_generator/_tabbedinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,12 @@ def __init__(
if title:
HTML(value=title)
with gr.Row():
with gr.Column(scale=1):
gr.LoginButton(value="Sign in!", variant="hf-login", size="sm", scale=2)
with gr.Column(scale=2):
gr.Markdown("### Build datasets using natural language")
with gr.Column(scale=3):
pass
with gr.Column(scale=2):
gr.LoginButton(value="Sign in!", variant="hf-login", size="sm", scale=2)
with Tabs():
for interface, tab_name in zip(interface_list, tab_names, strict=False):
with Tab(label=tab_name):
Expand Down
49 changes: 16 additions & 33 deletions src/distilabel_dataset_generator/apps/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
get_argilla_client,
get_login_button,
list_orgs,
swap_visibilty,
swap_visibility,
)

TEXTCAT_TASK = "text_classification"
Expand Down Expand Up @@ -137,7 +137,7 @@ def fn_generate_sample_dataset(system_prompt, progress=gr.Progress()):
show_progress=True,
)

app.load(fn=swap_visibilty, outputs=main_ui)
app.load(fn=swap_visibility, outputs=main_ui)
app.load(get_org_dropdown, outputs=[org_name])

return (
Expand Down Expand Up @@ -300,25 +300,6 @@ def get_iterate_on_sample_dataset_ui(
)


def get_pipeline_code_ui(pipeline_code: str) -> gr.Code:
gr.Markdown("## Customize and run with distilabel")
gr.HTML("<hr>")

with gr.Accordion(
"Run this pipeline using distilabel",
open=False,
):
gr.Markdown(
"You can run this pipeline locally with distilabel. For more information, please refer to the [distilabel documentation](https://distilabel.argilla.io/) or go to the FAQ tab at the top of the page for more information."
)
pipeline_code = gr.Code(
value=pipeline_code,
language="python",
label="Distilabel Pipeline Code",
)
return pipeline_code


def get_argilla_tab() -> Tuple[Any]:
with gr.Tab(label="Argilla"):
if get_argilla_client() is not None:
Expand Down Expand Up @@ -492,33 +473,35 @@ def get_success_message_row() -> gr.Markdown:
return success_message


def show_success_message_hub(org_name, repo_name) -> gr.Markdown:
def show_success_message(org_name, repo_name) -> gr.Markdown:
client = get_argilla_client()
argilla_api_url = client.api_url
return gr.Markdown(
value=f"""
<div style="padding: 1em; background-color: #e6f3e6; border-radius: 5px; margin-top: 1em;">
<h3 style="color: #2e7d32; margin: 0;">Dataset Published Successfully!</h3>
<p style="margin-top: 0.5em;">
Your dataset is now available the Hugging Face Hub:
<a href="https://huggingface.co/datasets/{org_name}/{repo_name}" target="_blank" style="color: #1565c0; text-decoration: none;">
https://huggingface.co/datasets/{org_name}/{repo_name}
</a>
<strong>
<a href="{argilla_api_url}" target="_blank" style="color: #1565c0; text-decoration: none;">
Open your dataset in the Argilla space
</a>
</strong>
</p>
<p style="margin-top: 0.5em;">
Your dataset is now available within Argilla:
<a href="{argilla_api_url}" target="_blank" style="color: #1565c0; text-decoration: none;">
{argilla_api_url}
The generated dataset is in the right format for fine-tuning with TRL, AutoTrain, or other frameworks. Your dataset is now available at:
<a href="https://huggingface.co/datasets/{org_name}/{repo_name}" target="_blank" style="color: #1565c0; text-decoration: none;">
https://huggingface.co/datasets/{org_name}/{repo_name}
</a>
<br>Unfamiliar with Argilla? Here are some docs to help you get started:
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/annotate/" target="_blank">How to curate data in Argilla</a>
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/import_export/" target="_blank">How to export data once you have reviewed the dataset</a>
</p>
</div>
<p style="margin-top: 1em; font-size: 0.9em; color: #333;">
Unfamiliar with Argilla? Here are some docs to help you get started:
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/annotate/" target="_blank">How to curate data in Argilla</a>
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/import_export/" target="_blank">How to export data once you have reviewed the dataset</a>
</p>
""",
visible=True,
)


def hide_success_message() -> gr.Markdown:
return gr.Markdown(value="")
Loading

0 comments on commit fb096d2

Please sign in to comment.