rezhajulio
diff --git a/‎README.md‎
Lines changed: 18 additions & 10 deletions b/‎README.md‎
Lines changed: 18 additions & 10 deletions
diff --git a/‎src/bot/config.py‎
Lines changed: 6 additions & 0 deletions b/‎src/bot/config.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/bot/constants.py‎
Lines changed: 111 additions & 0 deletions b/‎src/bot/constants.py‎
Lines changed: 111 additions & 0 deletions
diff --git a/‎src/bot/database/models.py‎
Lines changed: 29 additions & 0 deletions b/‎src/bot/database/models.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎src/bot/database/service.py‎
Lines changed: 130 additions & 0 deletions b/‎src/bot/database/service.py‎
Lines changed: 130 additions & 0 deletions
@@ -135,12 +135,12 @@ uv run pytest -v
 ### Test Coverage
 
 The project maintains comprehensive test coverage:
-- **Coverage**: 100% across all modules (887 statements, 0 missed)
-- **Tests**: 252 total
-- **Pass Rate**: 100% (252/252 passed)
-- **All modules**: 100% coverage including JobQueue scheduler integration and captcha verification
+- **Coverage**: 99% across all modules (1,057 statements, 2 missed)
+- **Tests**: 309 total
+- **Pass Rate**: 100% (309/309 passed)
+- **All modules**: 99% coverage including JobQueue scheduler integration, captcha verification, and anti-spam enforcement
   - Services: `bot_info.py`, `scheduler.py`, `user_checker.py`, `telegram_utils.py`, `captcha_recovery.py`
-  - Handlers: `captcha.py`, `dm.py`, `message.py`, `topic_guard.py`, `verify.py`
+  - Handlers: `anti_spam.py`, `captcha.py`, `dm.py`, `message.py`, `topic_guard.py`, `verify.py`
   - Database: `service.py`, `models.py`
   - Config: `config.py`
   - Constants: `constants.py`
@@ -151,6 +151,7 @@ All modules are fully unit tested with:
 - Database initialization and schema validation
 - Background job testing (JobQueue integration, job configuration, auto-restriction logic)
 - Captcha verification flow (new member handling, callback verification, timeout handling)
+- Anti-spam protection (forwarded messages, URL whitelisting, external replies)
 
 ## Project Structure
 
@@ -163,7 +164,10 @@ PythonID/
 ├── data/
 │   └── bot.db            # SQLite database (auto-created)
 ├── tests/
+│   ├── test_anti_spam.py
 │   ├── test_bot_info.py
+│   ├── test_captcha.py
+│   ├── test_captcha_recovery.py
 │   ├── test_config.py
 │   ├── test_constants.py
 │   ├── test_database.py
@@ -181,17 +185,21 @@ PythonID/
         ├── config.py            # Pydantic settings
         ├── constants.py         # Shared constants
         ├── handlers/
+        │   ├── anti_spam.py     # Anti-spam handler for probation users
+        │   ├── captcha.py       # Captcha verification handler
         │   ├── dm.py            # DM unrestriction handler
         │   ├── message.py       # Group message handler
-        │   └── topic_guard.py   # Warning topic protection
+        │   ├── topic_guard.py   # Warning topic protection
+        │   └── verify.py        # /verify and /unverify command handlers
         ├── database/
         │   ├── models.py        # SQLModel schemas
         │   └── service.py       # Database operations
         └── services/
-            ├── bot_info.py      # Bot info caching
-            ├── scheduler.py     # JobQueue background job
-            ├── telegram_utils.py # Shared telegram utilities
-            └── user_checker.py  # Profile validation
+            ├── bot_info.py           # Bot info caching
+            ├── captcha_recovery.py   # Captcha timeout recovery
+            ├── scheduler.py          # JobQueue background job
+            ├── telegram_utils.py     # Shared telegram utilities
+            └── user_checker.py       # Profile validation
 ```
 
 ## Bot Workflow
 
@@ -57,6 +57,8 @@ class Settings(BaseSettings):
         rules_link: URL to group rules message.
         captcha_enabled: Feature flag to enable/disable captcha verification.
         captcha_timeout: Seconds before auto-ban if user doesn't verify.
+        new_user_probation_hours: Hours new users are on probation (no links/forwards).
+        new_user_violation_threshold: Violations before restricting user.
         logfire_token: Logfire API token (optional, required for production logging).
         logfire_service_name: Service name for Logfire traces.
         logfire_environment: Environment name (production/staging).
@@ -74,6 +76,8 @@ class Settings(BaseSettings):
     rules_link: str = "https://t.me/pythonID/290029/321799"
     captcha_enabled: bool = False
     captcha_timeout_seconds: int = 120
+    new_user_probation_hours: int = 168  # 7 days default
+    new_user_violation_threshold: int = 3  # restrict after this many violations
     logfire_token: str | None = None
     logfire_service_name: str = "pythonid-bot"
     logfire_environment: str = "production"
@@ -101,6 +105,8 @@ def model_post_init(self, __context):
         logger.debug(f"database_path: {self.database_path}")
         logger.debug(f"captcha_enabled: {self.captcha_enabled}")
         logger.debug(f"captcha_timeout_seconds: {self.captcha_timeout_seconds}")
+        logger.debug(f"new_user_probation_hours: {self.new_user_probation_hours}")
+        logger.debug(f"new_user_violation_threshold: {self.new_user_violation_threshold}")
         logger.debug(f"telegram_bot_token: {'***' + self.telegram_bot_token[-4:]}")  # Mask sensitive token
         logger.debug(f"logfire_enabled: {self.logfire_enabled}")
         logger.debug(f"logfire_environment: {self.logfire_environment}")
 
@@ -47,6 +47,24 @@ def format_threshold_display(threshold_minutes: int) -> str:
     return f"{threshold_minutes} menit"
 
 
+def format_hours_display(hours: int) -> str:
+    """
+    Format hours to human-readable Indonesian text.
+    
+    Converts hours to "X hari" for values >= 24, or "Y jam" for smaller values.
+    
+    Args:
+        hours: Time in hours.
+        
+    Returns:
+        Formatted string like "7 hari" or "12 jam".
+    """
+    if hours >= 24:
+        days = hours // 24
+        return f"{days} hari"
+    return f"{hours} jam"
+
+
 # Message templates used in warning and restriction scenarios
 # Warning mode (default): No restrictions, just warnings
 WARNING_MESSAGE_NO_RESTRICTION = (
@@ -143,3 +161,96 @@ def format_threshold_display(threshold_minutes: int) -> str:
     "📋 User: {user_mention} (ID: {user_id})\n\n"
     "Pilih aksi untuk user ini:"
 )
+
+# Anti-spam probation warning for new users
+NEW_USER_SPAM_WARNING = (
+    "⚠️ {user_mention} baru bergabung dan sedang dalam masa percobaan.\n"
+    "Selama {probation_display}, kamu tidak boleh meneruskan pesan atau mengirim tautan.\n"
+    "Pesan yang melanggar akan dihapus dan kamu bisa dibatasi jika terus mengulang.\n"
+    "Hubungi admin jika kamu membutuhkan bantuan.\n\n"
+    "📖 [Baca aturan grup]({rules_link})"
+)
+
+# Anti-spam restriction message when user exceeds violation threshold
+NEW_USER_SPAM_RESTRICTION = (
+    "🚫 {user_mention} telah dibatasi karena mengirim pesan terlarang "
+    "(forward/link/quote eksternal) sebanyak {violation_count} kali selama masa percobaan.\n\n"
+    "📖 [Baca aturan grup]({rules_link})"
+)
+
+# Whitelisted URL domains for new user probation
+# These domains are allowed even during probation period
+# Matches exact domain or subdomains (e.g., "github.com" matches "www.github.com")
+WHITELISTED_URL_DOMAINS = frozenset([
+    # Documentation & References
+    "docs.python.org",
+    "docs.djangoproject.com",
+    "flask.palletsprojects.com",
+    "fastapi.tiangolo.com",
+    "pydantic-docs.helpmanual.io",
+    "pydantic.dev",
+    "sqlalchemy.org",
+    "docs.sqlalchemy.org",
+    "pandas.pydata.org",
+    "numpy.org",
+    "scipy.org",
+    "matplotlib.org",
+    "scikit-learn.org",
+    "pytorch.org",
+    "tensorflow.org",
+    "keras.io",
+    "huggingface.co",
+    "openai.com",
+    "anthropic.com",
+    "langchain.com",
+    "docs.aws.amazon.com",
+    "cloud.google.com",
+    "docs.microsoft.com",
+    "learn.microsoft.com",
+    
+    # Code Hosting & Collaboration
+    "github.com",
+    "gitlab.com",
+    "bitbucket.org",
+    "gist.github.com",
+    "raw.githubusercontent.com",
+    
+    # Package Repositories
+    "pypi.org",
+    "anaconda.org",
+    "conda.io",
+    "hub.docker.com",
+    
+    # Community & Learning
+    "stackoverflow.com",
+    "stackexchange.com",
+    "reddit.com",
+    "medium.com",
+    "towardsdatascience.com",
+    "dev.to",
+    "realpython.com",
+    "pythonweekly.com",
+    "kaggle.com",
+    "colab.research.google.com",
+    
+    # Data Science & ML Resources
+    "arxiv.org",
+    "paperswithcode.com",
+    "wandb.ai",
+    "mlflow.org",
+    "streamlit.io",
+    "gradio.app",
+    "jupyter.org",
+    "nbviewer.jupyter.org",
+    
+    # API Documentation
+    "developers.google.com",
+    "developer.twitter.com",
+    "developer.github.com",
+    "api.telegram.org",
+    "core.telegram.org",
+    
+    # Indonesian Tech Communities
+    "t.me",
+    "dicoding.com",
+])
@@ -102,3 +102,32 @@ class PendingCaptchaValidation(SQLModel, table=True):
     message_id: int
     user_full_name: str
     created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
+
+
+class NewUserProbation(SQLModel, table=True):
+    """
+    Tracks anti-spam probation for new users.
+
+    Users under probation cannot send links or forwarded messages
+    for a configurable period after joining. Violations are tracked
+    and users are restricted after exceeding the threshold.
+
+    Attributes:
+        id: Primary key (auto-generated).
+        user_id: Telegram user ID (indexed for fast lookups).
+        group_id: Telegram group ID where probation applies.
+        joined_at: Timestamp when probation started (after captcha verification).
+        violation_count: Number of spam violations (forward/link messages).
+        first_violation_at: Timestamp of first violation (for warnings).
+        last_violation_at: Timestamp of most recent violation.
+    """
+
+    __tablename__ = "new_user_probation"
+
+    id: int | None = Field(default=None, primary_key=True)
+    user_id: int = Field(index=True)
+    group_id: int = Field(index=True)
+    joined_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
+    violation_count: int = Field(default=0)
+    first_violation_at: datetime | None = Field(default=None)
+    last_violation_at: datetime | None = Field(default=None)
@@ -14,6 +14,7 @@
 from sqlmodel import Session, SQLModel, create_engine, delete, select
 
 from bot.database.models import (
+    NewUserProbation,
     PendingCaptchaValidation,
     PhotoVerificationWhitelist,
     UserWarning,
@@ -462,6 +463,135 @@ def get_all_pending_captchas(self) -> list[PendingCaptchaValidation]:
             statement = select(PendingCaptchaValidation)
             return list(session.exec(statement).all())
 
+    def start_new_user_probation(self, user_id: int, group_id: int) -> NewUserProbation:
+        """
+        Start or refresh probation for a new user.
+
+        Called when a user joins or passes captcha verification.
+        If a record exists, refreshes joined_at to current time.
+
+        Args:
+            user_id: Telegram user ID.
+            group_id: Telegram group ID.
+
+        Returns:
+            NewUserProbation: Created or updated probation record.
+        """
+        with Session(self._engine) as session:
+            statement = select(NewUserProbation).where(
+                NewUserProbation.user_id == user_id,
+                NewUserProbation.group_id == group_id,
+            )
+            record = session.exec(statement).first()
+
+            if record:
+                record.joined_at = datetime.now(UTC)
+                record.violation_count = 0
+                record.first_violation_at = None
+                record.last_violation_at = None
+            else:
+                record = NewUserProbation(
+                    user_id=user_id,
+                    group_id=group_id,
+                )
+            session.add(record)
+            session.commit()
+            session.refresh(record)
+            logger.info(f"Started probation for user_id={user_id}, group_id={group_id}")
+            return record
+
+    def get_new_user_probation(
+        self, user_id: int, group_id: int
+    ) -> NewUserProbation | None:
+        """
+        Get probation record for a user.
+
+        Args:
+            user_id: Telegram user ID.
+            group_id: Telegram group ID.
+
+        Returns:
+            NewUserProbation | None: Probation record or None if not found.
+        """
+        with Session(self._engine) as session:
+            statement = select(NewUserProbation).where(
+                NewUserProbation.user_id == user_id,
+                NewUserProbation.group_id == group_id,
+            )
+            return session.exec(statement).first()
+
+    def increment_new_user_violation(
+        self, user_id: int, group_id: int
+    ) -> NewUserProbation:
+        """
+        Increment violation count for a user on probation atomically.
+
+        Uses atomic SQL update to prevent race conditions when multiple
+        violations occur simultaneously.
+
+        Args:
+            user_id: Telegram user ID.
+            group_id: Telegram group ID.
+
+        Returns:
+            NewUserProbation: Updated probation record.
+
+        Raises:
+            ValueError: If no probation record exists.
+        """
+        from sqlalchemy import update as sql_update
+        
+        with Session(self._engine) as session:
+            # First check if record exists
+            select_stmt = select(NewUserProbation).where(
+                NewUserProbation.user_id == user_id,
+                NewUserProbation.group_id == group_id,
+            )
+            record = session.exec(select_stmt).first()
+
+            if not record:
+                raise ValueError(f"No probation record for user {user_id} in group {group_id}")
+
+            now = datetime.now(UTC)
+            
+            # Atomic update - increment directly in SQL
+            update_stmt = (
+                sql_update(NewUserProbation)
+                .where(NewUserProbation.id == record.id)
+                .values(
+                    violation_count=NewUserProbation.violation_count + 1,
+                    first_violation_at=now if record.first_violation_at is None else record.first_violation_at,
+                    last_violation_at=now,
+                )
+            )
+            session.exec(update_stmt)
+            session.commit()
+            
+            # Refresh to get updated values
+            session.refresh(record)
+            logger.info(
+                f"Incremented violation for user_id={user_id}, group_id={group_id}, "
+                f"count={record.violation_count}"
+            )
+            return record
+
+    def clear_new_user_probation(self, user_id: int, group_id: int) -> None:
+        """
+        Remove probation record for a user (when probation expires).
+
+        Args:
+            user_id: Telegram user ID.
+            group_id: Telegram group ID.
+        """
+        with Session(self._engine) as session:
+            statement = delete(NewUserProbation).where(
+                NewUserProbation.user_id == user_id,
+                NewUserProbation.group_id == group_id,
+            )
+            session.exec(statement)
+            session.commit()
+            logger.info(f"Cleared probation for user_id={user_id}, group_id={group_id}")
+
 
 # Module-level singleton for database service
 _db_service: DatabaseService | None = None