Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions evaluation_function/compareFSA_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# dummy file for shimmy tests

import sys
import json
from typing import Any
from .evaluation import evaluation_function
from lf_toolkit.evaluation import Params

def main():
for line in sys.stdin:
try:
# Parse request from Shimmy
req = json.loads(line)
response: Any = req["input"]["response"]
answer: Any = req["input"]["answer"]
params_dict: dict = req["input"].get("params", {})
params = Params(params_dict)

# Call your evaluation function
result = evaluation_function(response, answer, params)

# Convert LFResult to JSON
resp = {
"output": {
"is_correct": result.is_correct,
"feedback_items": result.feedback_items
}
}
print(json.dumps(resp), flush=True)

except Exception as e:
# Always return JSON even on error
resp = {"output": {"is_correct": False, "feedback_items": [("error", str(e))]}}
print(json.dumps(resp), flush=True)

if __name__ == "__main__":
main()
74 changes: 40 additions & 34 deletions evaluation_function/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,53 @@
from .schemas.result import Result
from .correction import analyze_fsa_correction



def evaluation_function(
response: Any,
answer: Any,
params: Params,
payload: Any
) -> LFResult:
"""
Evaluate a student's FSA response against the expected answer.
return LFResult(
is_correct=False,
feedback_items=[("error", f"{payload}")]
)

# def evaluation_function(
# response: Any,
# answer: Any,
# params: Params,
# ) -> LFResult:
# """
# Evaluate a student's FSA response against the expected answer.

Args:
response: Student's FSA (dict with states, alphabet, transitions, etc.), since frontend constriants, this is FSAFrontend
answer: Expected FSA still, FSAFrontend for the same reason
params: Extra parameters (e.g., require_minimal)
# Args:
# response: Student's FSA (dict with states, alphabet, transitions, etc.), since frontend constriants, this is FSAFrontend
# answer: Expected FSA still, FSAFrontend for the same reason
# params: Extra parameters (e.g., require_minimal)

Returns:
LFResult with is_correct and feedback
"""
try:
# Parse FSAs from input
student_fsa_ = FSAFrontend.model_validate(response)
expected_fsa_ = FSAFrontend.model_validate(answer)
# Returns:
# LFResult with is_correct and feedback
# """
# try:
# # Parse FSAs from input
# student_fsa_ = FSAFrontend.model_validate(response)
# expected_fsa_ = FSAFrontend.model_validate(answer)

student_fsa = student_fsa_.from_flattened()
expected_fsa = expected_fsa_.from_flattened()
# student_fsa = student_fsa_.from_flattened()
# expected_fsa = expected_fsa_.from_flattened()


# Get require_minimal from params if present
require_minimal = params.get("require_minimal", False) if hasattr(params, "get") else False
# # Get require_minimal from params if present
# require_minimal = params.get("require_minimal", False) if hasattr(params, "get") else False

# Run correction pipeline
result: Result = analyze_fsa_correction(student_fsa, expected_fsa, require_minimal)
# # Run correction pipeline
# result: Result = analyze_fsa_correction(student_fsa, expected_fsa, require_minimal)

# Convert to lf_toolkit Result
return LFResult(
is_correct=result.is_correct,
feedback_items=[("feedback", result.feedback)]
)
# # Convert to lf_toolkit Result
# return LFResult(
# is_correct=result.is_correct,
# feedback_items=[("feedback", result.feedback)]
# )

except Exception as e:
return LFResult(
is_correct=False,
feedback_items=[("error", f"Invalid FSA format: {str(e)}, received: \n\nresponse: {response}\n\n answer: {answer}, \n\nparams: {params}")]
)
# except Exception as e:
# return LFResult(
# is_correct=False,
# feedback_items=[("error", f"Invalid FSA format: {str(e)}, received: \n\nresponse: {response}\n\n answer: {answer}, \n\nparams: {params}")]
# )