From 0764946c17d1809c338ff4f6583ae78d07c3757e Mon Sep 17 00:00:00 2001
From: yuetsh <517252939@qq.com>
Date: Wed, 3 Jun 2026 07:19:57 -0600
Subject: [PATCH] add wc

---
 deploy/requirements.txt  |   3 +
 flowchart/urls/admin.py  |   7 ++
 flowchart/views/admin.py | 158 +++++++++++++++++++++++++++++++++++++++
 oj/urls.py               |   1 +
 pyproject.toml           |   1 +
 uv.lock                  |   8 ++
 6 files changed, 178 insertions(+)
 create mode 100644 flowchart/urls/admin.py
 create mode 100644 flowchart/views/admin.py

diff --git a/deploy/requirements.txt b/deploy/requirements.txt
index c6eb248..f9c2b3b 100644
--- a/deploy/requirements.txt
+++ b/deploy/requirements.txt
@@ -213,6 +213,9 @@ idna==3.13 \
     #   anyio
     #   httpx
     #   requests
+jieba==0.42.1 \
+    --hash=sha256:055ca12f62674fafed09427f176506079bc135638a14e23e25be909131928db2
+    # via onlinejudge
 jiter==0.14.0 \
     --hash=sha256:004df5fdb8ecbd6d99f3227df18ba1a259254c4359736a2e6f036c944e02d7c5 \
     --hash=sha256:14c0cb10337c49f5eafe8e7364daca5e29a020ea03580b8f8e6c597fed4e1588 \
diff --git a/flowchart/urls/admin.py b/flowchart/urls/admin.py
new file mode 100644
index 0000000..7ba6bae
--- /dev/null
+++ b/flowchart/urls/admin.py
@@ -0,0 +1,7 @@
+from django.urls import path
+
+from ..views.admin import FlowchartStatisticsAPI
+
+urlpatterns = [
+    path("flowchart/statistics", FlowchartStatisticsAPI.as_view()),
+]
diff --git a/flowchart/views/admin.py b/flowchart/views/admin.py
new file mode 100644
index 0000000..3daed92
--- /dev/null
+++ b/flowchart/views/admin.py
@@ -0,0 +1,158 @@
+import re
+from collections import Counter
+
+import jieba
+from django.db.models import Avg, Count
+
+from account.decorators import teacher_admin_required
+from account.models import AdminType, User
+from problem.models import Problem
+from utils.api import APIView
+
+from ..models import FlowchartSubmission, FlowchartSubmissionStatus
+
+STOPWORDS = frozenset(
+    "的 了 是 在 和 有 就 不 也 都 要 会 这 那 到 说 上 为 与 及 等 "
+    "把 被 从 而 所 但 如 又 或 很 更 还 让 对 已 向 只 能 以 中 可以 "
+    "可能 需要 没有 使用 进行 注意 建议 应该 考虑 "
+    "一个 一些 一下 一定 一种 这个 所有 其他 ".split()
+)
+
+
+def get_real_name(username, class_name):
+    if class_name and username.startswith("ks"):
+        return username[len(f"ks{class_name}"):]
+    return username
+
+
+class FlowchartStatisticsAPI(APIView):
+    @teacher_admin_required
+    def get(self, request):
+        start = request.GET.get("start")
+        end = request.GET.get("end")
+
+        if not end:
+            return self.error("end is required")
+
+        filters = {
+            "status": FlowchartSubmissionStatus.COMPLETED,
+            "create_time__lte": end,
+        }
+        if start:
+            filters["create_time__gte"] = start
+
+        submissions = FlowchartSubmission.objects.filter(**filters)
+
+        problem_id = request.GET.get("problem_id")
+        if problem_id:
+            try:
+                problem = Problem.objects.get(
+                    _id__iexact=problem_id, contest_id__isnull=True, visible=True
+                )
+            except Problem.DoesNotExist:
+                return self.error("Problem doesn't exist")
+            submissions = submissions.filter(problem=problem)
+
+        username = request.GET.get("username")
+        all_users_dict = {}
+        if username:
+            submissions = submissions.filter(user__username__icontains=username)
+            all_users_dict = {
+                user["username"]: user["class_name"]
+                for user in User.objects.filter(
+                    username__icontains=username,
+                    is_disabled=False,
+                    admin_type=AdminType.REGULAR_USER,
+                ).values("username", "class_name")
+            }
+
+        total_count = submissions.count()
+        if total_count == 0:
+            return self.success({
+                "total_count": 0,
+                "avg_score": 0,
+                "grade_distribution": {},
+                "criteria_averages": {},
+                "person_count": len(all_users_dict),
+                "completed_count": 0,
+                "word_frequencies": [],
+                "data_unaccepted": [],
+            })
+
+        # 1. Grade distribution
+        grade_counts = dict(
+            submissions.values_list("ai_grade")
+            .annotate(count=Count("id"))
+            .values_list("ai_grade", "count")
+        )
+
+        # 2. Average score
+        avg_score = submissions.aggregate(avg=Avg("ai_score"))["avg"] or 0
+
+        # 3. Criteria averages from ai_criteria_details JSON
+        criteria_totals = Counter()
+        criteria_counts = Counter()
+        criteria_max = {}
+
+        suggestions_texts = []
+
+        for row in submissions.values_list(
+            "ai_criteria_details", "ai_suggestions"
+        ).iterator():
+            details, suggestions = row
+            if details and isinstance(details, dict):
+                for key, val in details.items():
+                    if isinstance(val, dict) and "score" in val:
+                        criteria_totals[key] += val["score"]
+                        criteria_counts[key] += 1
+                        if key not in criteria_max:
+                            criteria_max[key] = val.get("max", 100)
+            if suggestions:
+                suggestions_texts.append(suggestions)
+
+        criteria_averages = {}
+        for key in criteria_totals:
+            criteria_averages[key] = {
+                "avg": round(criteria_totals[key] / criteria_counts[key], 1),
+                "max": criteria_max.get(key, 100),
+            }
+
+        # 4. Completion stats
+        submitted_users = set(
+            submissions.values_list("user__username", flat=True).distinct()
+        )
+        completed_count = len(submitted_users)
+
+        # Unaccepted users
+        unaccepted = []
+        if all_users_dict:
+            for uname in set(all_users_dict.keys()) - submitted_users:
+                class_name = all_users_dict[uname]
+                real_name = get_real_name(uname, class_name)
+                unaccepted.append({"username": uname, "real_name": real_name})
+
+        # 5. Word cloud from suggestions
+        word_freq = self._build_word_frequencies(suggestions_texts)
+
+        return self.success({
+            "total_count": total_count,
+            "avg_score": round(avg_score, 1),
+            "grade_distribution": grade_counts,
+            "criteria_averages": criteria_averages,
+            "person_count": len(all_users_dict),
+            "completed_count": completed_count,
+            "word_frequencies": word_freq,
+            "data_unaccepted": unaccepted,
+        })
+
+    @staticmethod
+    def _build_word_frequencies(texts, top_n=80):
+        counter = Counter()
+        for text in texts:
+            text = re.sub(r"【重点】", "", text)
+            words = jieba.cut(text)
+            for w in words:
+                w = w.strip()
+                if len(w) >= 2 and w not in STOPWORDS:
+                    counter[w] += 1
+        return [{"word": w, "count": c} for w, c in counter.most_common(top_n)]
diff --git a/oj/urls.py b/oj/urls.py
index 52b9153..4c34926 100644
--- a/oj/urls.py
+++ b/oj/urls.py
@@ -21,6 +21,7 @@ urlpatterns = [
     path("api/admin/", include("tutorial.urls.admin")),
     path("api/", include("ai.urls.oj")),
     path("api/", include("flowchart.urls.oj")),
+    path("api/admin/", include("flowchart.urls.admin")),
     path("api/", include("problemset.urls.oj")),
     path("api/admin/", include("problemset.urls.admin")),
     path("api/", include("class_pk.urls.oj")),
diff --git a/pyproject.toml b/pyproject.toml
index 59f2b9b..1eab917 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,7 @@ dependencies = [
     "tree-sitter-python>=0.25.0",
     "xlsxwriter>=3.2.9,<4",
     "asgiref>=3.11.1",
+    "jieba>=0.42.1",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock
index 5d58ea4..f5ab29f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -349,6 +349,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5d/13/ad7d7ca3808a898b4612b6fe93cde56b53f3034dcde235acb1f0e1df24c6/idna-3.13-py3-none-any.whl", hash = "sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3", size = 68629, upload-time = "2026-04-22T16:42:40.909Z" },
 ]
 
+[[package]]
+name = "jieba"
+version = "0.42.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c6/cb/18eeb235f833b726522d7ebed54f2278ce28ba9438e3135ab0278d9792a2/jieba-0.42.1.tar.gz", hash = "sha256:055ca12f62674fafed09427f176506079bc135638a14e23e25be909131928db2", size = 19214172, upload-time = "2020-01-20T14:27:23.5Z" }
+
 [[package]]
 name = "jiter"
 version = "0.14.0"
@@ -561,6 +567,7 @@ dependencies = [
     { name = "djangorestframework" },
     { name = "dramatiq" },
     { name = "gunicorn" },
+    { name = "jieba" },
     { name = "openai" },
     { name = "otpauth" },
     { name = "pillow" },
@@ -594,6 +601,7 @@ requires-dist = [
     { name = "djangorestframework", specifier = ">=3.17.1,<4" },
     { name = "dramatiq", specifier = ">=2.1.0,<3" },
     { name = "gunicorn", specifier = ">=26.0.0,<27" },
+    { name = "jieba", specifier = ">=0.42.1" },
     { name = "openai", specifier = ">=2.34.0,<3" },
     { name = "otpauth", specifier = ">=2.2.1,<3" },
     { name = "pillow", specifier = ">=12.2.0,<13" },