Prospective_Python_Search_Engine/test_evaluation.py at main · ConstantlyTrying989/Prospective_Python_Search_Engine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""
Test evaluation metrics.
"""

import sys
import os
project_root = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, project_root)

from src.loader import DocumentLoader
from src.search import SearchEngine
from src.evaluation import SearchEvaluator

#define test cases with known relevant documents
def main():
    print("="*70)
    print("EVALUATION METRICS TEST")
    print("="*70)
    print()

    # Load and index documents
    print("Loading documents...")
    loader = DocumentLoader('data/raw_texts')

    try:
        documents = loader.load_documents()
    except Exception as e:
        print(f"❌ ERROR: {e}")
        return

    print()

    engine = SearchEngine()
    engine.index_documents(documents)

    evaluator = SearchEvaluator()

    # First, let's see what documents we have
    print("="*70)
    print("AVAILABLE DOCUMENTS:")
    print("="*70)
    for i, doc in enumerate(documents):
        print(f"  {i}: {doc['title']}")
    print()

    # Define test cases with known relevant documents
    # NOTE: You'll need to adjust these indices based on YOUR actual documents!
    # Run the script first to see which index corresponds to which book
    test_cases = [
        {
            'query': 'detective mystery crime',
            'relevant_docs': {0, 1, 2},  # Adjust these based on your actual documents
            'description': 'Detective/Mystery query'
        },
        {
            'query': 'whale ocean sea',
            'relevant_docs': {0, 1},  # Adjust these based on your actual documents
            'description': 'Ocean/Whale query'
        },
        {
            'query': 'vampire blood night',
            'relevant_docs': {0},  # Adjust these based on your actual documents
            'description': 'Vampire query'
        }
    ]

    print("="*70)
    print("RUNNING EVALUATION ON TEST QUERIES:")
    print("="*70)
    print("\nNOTE: You may need to adjust 'relevant_docs' indices in the code")
    print("      based on which books you have in data/raw_texts/\n")
    print("-"*70)

    for test in test_cases:
        query = test['query']
        relevant_docs = test['relevant_docs']

        # Search
        results = engine.search(query, top_k=5)
        retrieved_docs = [r['doc_index'] for r in results]

        # Calculate metrics
        p_at_3 = evaluator.precision_at_k(relevant_docs, retrieved_docs, 3)
        p_at_5 = evaluator.precision_at_k(relevant_docs, retrieved_docs, 5)
        recall_at_5 = evaluator.recall_at_k(relevant_docs, retrieved_docs, 5)
        avg_prec = evaluator.average_precision(relevant_docs, retrieved_docs)

        print(f"\n{test['description']}: '{query}'")
        print(f"  Precision@3: {p_at_3:.3f}")
        print(f"  Precision@5: {p_at_5:.3f}")
        print(f"  Recall@5: {recall_at_5:.3f}")
        print(f"  Average Precision: {avg_prec:.3f}")

        print(f"\n  Top 5 results:")
        for i, result in enumerate(results[:5], 1):
            marker = "✅" if result['doc_index'] in relevant_docs else "  "
            print(f"    {marker} {i}. {result['title']} (score: {result['score']:.4f})")

    print("\n" + "="*70)
    print("METRIC EXPLANATION:")
    print("="*70)
    print("  • Precision@K: % of top K results that are relevant")
    print("  • Recall@K: % of all relevant docs found in top K")
    print("  • Average Precision: Quality of ranking (higher = better)")

    print("\n" + "="*70)
    print("✅ Evaluation completed!")
    print("="*70)
    print("\nIMPORTANT: Edit this file to set correct 'relevant_docs' for your corpus!")
    print("="*70)


if __name__ == '__main__':
    main()