-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_evaluation.py
More file actions
114 lines (94 loc) · 3.63 KB
/
Copy pathtest_evaluation.py
File metadata and controls
114 lines (94 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""
Test evaluation metrics.
"""
import sys
import os
project_root = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, project_root)
from src.loader import DocumentLoader
from src.search import SearchEngine
from src.evaluation import SearchEvaluator
#define test cases with known relevant documents
def main():
print("="*70)
print("EVALUATION METRICS TEST")
print("="*70)
print()
# Load and index documents
print("Loading documents...")
loader = DocumentLoader('data/raw_texts')
try:
documents = loader.load_documents()
except Exception as e:
print(f"❌ ERROR: {e}")
return
print()
engine = SearchEngine()
engine.index_documents(documents)
evaluator = SearchEvaluator()
# First, let's see what documents we have
print("="*70)
print("AVAILABLE DOCUMENTS:")
print("="*70)
for i, doc in enumerate(documents):
print(f" {i}: {doc['title']}")
print()
# Define test cases with known relevant documents
# NOTE: You'll need to adjust these indices based on YOUR actual documents!
# Run the script first to see which index corresponds to which book
test_cases = [
{
'query': 'detective mystery crime',
'relevant_docs': {0, 1, 2}, # Adjust these based on your actual documents
'description': 'Detective/Mystery query'
},
{
'query': 'whale ocean sea',
'relevant_docs': {0, 1}, # Adjust these based on your actual documents
'description': 'Ocean/Whale query'
},
{
'query': 'vampire blood night',
'relevant_docs': {0}, # Adjust these based on your actual documents
'description': 'Vampire query'
}
]
print("="*70)
print("RUNNING EVALUATION ON TEST QUERIES:")
print("="*70)
print("\nNOTE: You may need to adjust 'relevant_docs' indices in the code")
print(" based on which books you have in data/raw_texts/\n")
print("-"*70)
for test in test_cases:
query = test['query']
relevant_docs = test['relevant_docs']
# Search
results = engine.search(query, top_k=5)
retrieved_docs = [r['doc_index'] for r in results]
# Calculate metrics
p_at_3 = evaluator.precision_at_k(relevant_docs, retrieved_docs, 3)
p_at_5 = evaluator.precision_at_k(relevant_docs, retrieved_docs, 5)
recall_at_5 = evaluator.recall_at_k(relevant_docs, retrieved_docs, 5)
avg_prec = evaluator.average_precision(relevant_docs, retrieved_docs)
print(f"\n{test['description']}: '{query}'")
print(f" Precision@3: {p_at_3:.3f}")
print(f" Precision@5: {p_at_5:.3f}")
print(f" Recall@5: {recall_at_5:.3f}")
print(f" Average Precision: {avg_prec:.3f}")
print(f"\n Top 5 results:")
for i, result in enumerate(results[:5], 1):
marker = "✅" if result['doc_index'] in relevant_docs else " "
print(f" {marker} {i}. {result['title']} (score: {result['score']:.4f})")
print("\n" + "="*70)
print("METRIC EXPLANATION:")
print("="*70)
print(" • Precision@K: % of top K results that are relevant")
print(" • Recall@K: % of all relevant docs found in top K")
print(" • Average Precision: Quality of ranking (higher = better)")
print("\n" + "="*70)
print("✅ Evaluation completed!")
print("="*70)
print("\nIMPORTANT: Edit this file to set correct 'relevant_docs' for your corpus!")
print("="*70)
if __name__ == '__main__':
main()