cortexlinux · pavanimanchala53 · Nov 30, 2025 · Dec 11, 2025 · Dec 13, 2025 · Dec 13, 2025
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -0,0 +1,99 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL Advanced"
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+  schedule:
+    - cron: '44 23 * * 6'
+
+jobs:
+  analyze:
+    name: Analyze (${{ matrix.language }})
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners (GitHub.com only)
+    # Consider using larger runners or machines with greater resources for possible analysis time improvements.
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    permissions:
+      # required for all workflows
+      security-events: write
+
+      # required to fetch internal or private CodeQL packs
+      packages: read
+
+      # only required for workflows in private repositories
+      actions: read
+      contents: read
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - language: python
+          build-mode: none
+        # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift'
+        # Use `c-cpp` to analyze code written in C, C++ or both
+        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
+        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
+        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
+        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
+        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
+        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    # Add any setup steps before running the `github/codeql-action/init` action.
+    # This includes steps like installing compilers or runtimes (`actions/setup-node`
+    # or others). This is typically only required for manual builds.
+    # - name: Setup runtime (example)
+    #   uses: actions/setup-example@v1
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v4
+      with:
+        languages: ${{ matrix.language }}
+        build-mode: ${{ matrix.build-mode }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        # queries: security-extended,security-and-quality
+
+    # If the analyze step fails for one of the languages you are analyzing with
+    # "We were unable to automatically build your code", modify the matrix above
+    # to set the build mode to "manual" for that language. Then modify this step
+    # to build your code.
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+    - name: Run manual build steps
+      if: matrix.build-mode == 'manual'
+      shell: bash
+      run: |
+        echo 'If you are using a "manual" build mode for one or more of the' \
+          'languages you are analyzing, replace this with the commands to build' \
+          'your code, for example:'
+        echo '  make bootstrap'
+        echo '  make release'
+        exit 1
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v4
+      with:
+        category: "/language:${{matrix.language}}"
diff --git a/nl_parser.py b/nl_parser.py
@@ -0,0 +1,222 @@
+import difflib
+import re
+from difflib import SequenceMatcher
+from typing import Dict, Any, List, Tuple
+
+# Vocabulary for typo correction
+VOCAB = {
+    "python", "pip", "venv", "virtualenv", "conda", "anaconda",
+    "docker", "kubernetes", "k8s", "kubectl",
+    "nginx", "apache", "httpd", "web", "server",
+    "flask", "django", "tensorflow", "pytorch", "torch",
+    "install", "setup", "development", "env", "environment",
+}
+
+# Canonical examples for lightweight semantic matching
+INTENT_EXAMPLES = {
+    "install_ml": [
+        "install something for machine learning",
+        "install pytorch",
+        "install tensorflow",
+        "i want to run pytorch",
+    ],
+    "install_web_server": [
+        "i need a web server",
+        "install nginx",
+        "install apache",
+        "set up a web server",
+    ],
+    "setup_python_env": [
+        "set up python development environment",
+        "install python 3.10",
+        "create python venv",
+        "setup dev env",
+    ],
+    "install_docker": [
+        "install docker",
+        "add docker",
+        "deploy containers - docker",
+    ],
+    "install_docker_k8s": [
+        "install docker and kubernetes",
+        "docker and k8s",
+        "k8s and docker on my mac",
+    ],
+}
+
+
+def normalize(text: str) -> str:
+    text = text.lower()
+    text = text.replace("-", " ")
+    text = re.sub(r"[^a-z0-9.\s]", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+
+
+def tokenize(text: str) -> List[str]:
+    return text.split()
+
+
+def spell_correct_token(token: str) -> Tuple[str, bool]:
+    """Return corrected_token, was_corrected"""
+    if token in VOCAB:
+        return token, False
+    close = difflib.get_close_matches(token, VOCAB, n=1, cutoff=0.75)
+    if close:
+        return close[0], True
+    return token, False
+
+
+def apply_spell_correction(tokens: List[str]) -> Tuple[List[str], List[Tuple[str, str]]]:
+    corrections = []
+    new_tokens = []
+    for t in tokens:
+        new, fixed = spell_correct_token(t)
+        if fixed:
+            corrections.append((t, new))
+        new_tokens.append(new)
+    return new_tokens, corrections
+
+
+def fuzzy_phrase_score(a: str, b: str) -> float:
+    return SequenceMatcher(None, a, b).ratio()
+
+
+def semantic_intent_score(text: str) -> Tuple[str, float]:
+    """Compare text with intent examples."""
+    best_intent = "unknown"
+    best_score = 0.0
+
+    for intent, examples in INTENT_EXAMPLES.items():
+        for ex in examples:
+            score = fuzzy_phrase_score(text, ex)
+            if score > best_score:
+                best_score = score
+                best_intent = intent
+
+    return best_intent, best_score
+
+
+def rule_intent(text: str) -> Tuple[str, float]:
+    """Simple keyword/rule-based detection."""
+    t = text
+
+    if "docker" in t:
+        if "kubernetes" in t or "k8s" in t or "kubectl" in t:
+            return "install_docker_k8s", 0.95
+        return "install_docker", 0.9
+
+    if "kubernetes" in t or "k8s" in t or "kubectl" in t:
+        return "install_docker_k8s", 0.9
+
+    if "nginx" in t or "apache" in t or "httpd" in t or "web server" in t:
+        return "install_web_server", 0.9
+
+    if "python" in t or "venv" in t or "conda" in t or "anaconda" in t:
+        return "setup_python_env", 0.9
+
+    if any(word in t for word in ("tensorflow", "pytorch", "torch", "machine learning", "ml")):
+        return "install_ml", 0.9
+
+    return "unknown", 0.0
+
+
+VERSION_RE = re.compile(r"python\s*([0-9]+(?:\.[0-9]+)?)")
+PLATFORM_RE = re.compile(r"\b(mac|macos|windows|linux|ubuntu|debian)\b")
+PACKAGE_RE = re.compile(r"\b(nginx|apache|docker|kubernetes|k8s|kubectl|python|pip|venv|conda|tensorflow|pytorch)\b")
+
+
+def extract_slots(text: str) -> Dict[str, Any]:
+    slots = {}
+
+    v = VERSION_RE.search(text)
+    if v:
+        slots["python_version"] = v.group(1)
+
+    p = PLATFORM_RE.search(text)
+    if p:
+        slots["platform"] = p.group(1)
+
+    pkgs = PACKAGE_RE.findall(text)
+    if pkgs:
+        slots["packages"] = list(dict.fromkeys(pkgs))  # unique preserve order
+
+    return slots
+
+
+def aggregate_confidence(c_rule, c_sem, num_corrections, c_classifier=0.0):
+    penalty = 1 - (num_corrections * 0.1)
+    penalty = max(0.0, penalty)
+
+    final = (
+        0.4 * c_rule +
+        0.4 * c_sem +
+        0.2 * c_classifier
+    ) * penalty
+
+    return round(max(0.0, min(1.0, final)), 2)
+
+
+def decide_clarifications(intent, confidence):
+    if intent == "unknown" or confidence < 0.6:
+        return [
+            "Install Docker and Kubernetes",
+            "Set up Python development environment",
+            "Install a web server (nginx/apache)",
+            "Install ML libraries (tensorflow/pytorch)",
+        ]
+    if intent == "setup_python_env" and confidence < 0.75:
+        return ["Use venv", "Use conda", "Install a specific Python version"]
+    return []
+
+
+def parse_request(text: str) -> Dict[str, Any]:
+    """Main function used by tests and demo."""
+    norm = normalize(text)
+    tokens = tokenize(norm)
+
+    tokens_corr, corrections = apply_spell_correction(tokens)
+    corrected_text = " ".join(tokens_corr)
+
+    rule_int, c_rule = rule_intent(corrected_text)
+    sem_int, c_sem = semantic_intent_score(corrected_text)
+
+    if rule_int != "unknown" and rule_int == sem_int:
+        chosen_intent = rule_int
+        c_classifier = 0.95
+    elif rule_int != "unknown":
+        chosen_intent = rule_int
+        c_classifier = 0.0
+    elif c_sem > 0.6:
+        chosen_intent = sem_int
+        c_classifier = 0.0
+    else:
+        chosen_intent = "unknown"
+        c_classifier = 0.0
+
+    slots = extract_slots(corrected_text)
+
+    confidence = aggregate_confidence(
+        c_rule, c_sem, len(corrections), c_classifier
+    )
+
+    clarifications = decide_clarifications(chosen_intent, confidence)
+
+    explanation = []
+    if corrections:
+        explanation.append(
+            "corrected: " + ", ".join(f"{a}->{b}" for a, b in corrections)
+        )
+    explanation.append(f"rule_intent={rule_int} ({c_rule:.2f})")
+    explanation.append(f"semantic_match={sem_int} ({c_sem:.2f})")
+
+    return {
+        "intent": chosen_intent,
+        "confidence": confidence,
+        "explanation": "; ".join(explanation),
+        "slots": slots,
+        "corrections": corrections,
+        "clarifications": clarifications,
+    }
+
+
@@ -0,0 +1,37 @@
+import pytest
+from nl_parser import parse_request
+
+@pytest.mark.parametrize("text,expected", [
+    ("install something for machine learning", "install_ml"),
+    ("I need a web server", "install_web_server"),
+    ("set up python development environment", "setup_python_env"),
+    ("install docker and kubernets", "install_docker_k8s"),
+    ("Can you provision a python env with pip, venv and flake8?", "setup_python_env"),
+    ("need nginx or apache for a website", "install_web_server"),
+    ("deploy containers - docker", "install_docker"),
+    ("k8s and docker on my mac", "install_docker_k8s"),
+    ("i want to run pytorch", "install_ml"),
+    ("setup dev env", "ambiguous"),
+    ("add docker", "install_docker"),
+    ("pls install pyhton 3.10", "setup_python_env"),
+])
+def test_intent(text, expected):
+    result = parse_request(text)
+    intent = result["intent"]
+    confidence = result["confidence"]
+
+    if expected == "ambiguous":
+        assert result["clarifications"], f"Expected clarifications for: {text}"
+    else:
+        assert intent == expected
+        assert confidence >= 0.5
+
+def test_corrections():
+    r = parse_request("install docker and kubernets")
+    assert r["intent"] == "install_docker_k8s"
+    assert any(orig == "kubernets" for orig, _ in r["corrections"])
+
+def test_slot_extraction():
+    r = parse_request("pls install python 3.10 on mac")
+    assert r["slots"].get("python_version") == "3.10"
+    assert r["slots"].get("platform") in ("mac", "macos")