jupyter: add includeSection (#1337)

MykolaGolubyev · web-flow · commit 28c4d6623d70 · 2025-08-25T22:24:22.000-04:00
diff --git a/znai-docs/znai/release-notes/1.78/add-2025-08-25-jupyter-include-section.md b/znai-docs/znai/release-notes/1.78/add-2025-08-25-jupyter-include-section.md
@@ -0,0 +1 @@
+* Add: [Jupyter](snippets/jupyter-notebook) `includeSection`
diff --git a/znai-docs/znai/snippets/jupyter-notebook.md b/znai-docs/znai/snippets/jupyter-notebook.md
@@ -20,7 +20,18 @@ become part of Table Of Contents and part of a search unit.
 
 Note: below text is auto generated, including the **Panda** section 
 :include-jupyter: src/test/resources/notebook-with-markdown-story.ipynb
-     
+
+# Include Only Specified Sections
+
+Use `includeSection` to select which sections of the notebook to include.
+
+```
+:include-jupyter: notebook.ipynb {
+    includeSection: ["Section One"; "Section Three"], 
+    excludeSectionTitle: true
+}
+```
+
 # Two Sides
 
 You will learn about the [Two Sides Layout](layout/two-sides-pages) in the Layout section. 
diff --git a/znai-jupyter/pom.xml b/znai-jupyter/pom.xml
@@ -55,4 +55,21 @@
         </dependency>
     </dependencies>
 
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.gmavenplus</groupId>
+                <artifactId>gmavenplus-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>addTestSources</goal>
+                            <goal>compileTests</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
 </project>
diff --git a/znai-jupyter/src/main/java/org/testingisdocumenting/znai/jupyter/JupyterCellFilter.java b/znai-jupyter/src/main/java/org/testingisdocumenting/znai/jupyter/JupyterCellFilter.java
@@ -0,0 +1,89 @@
+package org.testingisdocumenting.znai.jupyter;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class JupyterCellFilter {
+    public static List<JupyterCell> fromSection(List<JupyterCell> cells, String sectionName) {
+        return fromSection(cells, sectionName, false);
+    }
+    
+    public static List<JupyterCell> fromSection(List<JupyterCell> cells, String sectionName, boolean excludeSectionTitle) {
+        int startIdx = -1;
+        int endIdx = cells.size();
+        
+        for (int i = 0; i < cells.size(); i++) {
+            JupyterCell cell = cells.get(i);
+            if (cell.getType().equals(JupyterCell.MARKDOWN_TYPE) && 
+                cell.getInput() != null) {
+                String input = cell.getInput().trim();
+                String firstLine = input.contains("\n") ? input.substring(0, input.indexOf("\n")) : input;
+                if (firstLine.matches("^#+\\s+" + java.util.regex.Pattern.quote(sectionName))) {
+                    startIdx = i;
+                    break;
+                }
+            }
+        }
+        
+        if (startIdx == -1) {
+            return new ArrayList<>();
+        }
+        
+        for (int i = startIdx + 1; i < cells.size(); i++) {
+            JupyterCell cell = cells.get(i);
+            if (cell.getType().equals(JupyterCell.MARKDOWN_TYPE) && 
+                cell.getInput() != null && 
+                cell.getInput().trim().matches("^#+ .*")) {
+                endIdx = i;
+                break;
+            }
+        }
+        
+        List<JupyterCell> sectionCells = new ArrayList<>();
+        for (int i = startIdx; i < endIdx; i++) {
+            sectionCells.add(cells.get(i));
+        }
+
+        return excludeSectionTitle ?
+                excludeSectionHeader(sectionCells):
+                sectionCells;
+    }
+
+    private static String stripFirstSectionHeader(String content) {
+        String[] lines = content.split("\\n");
+        int firstMatchIdx = 0;
+        for (String line : lines) {
+            if (line.strip().startsWith("#")) {
+                break;
+            }
+
+            firstMatchIdx++;
+        }
+
+        return Arrays.stream(lines).skip(firstMatchIdx + 1).collect(Collectors.joining("\n")).trim();
+    }
+    
+    private static List<JupyterCell> excludeSectionHeader(List<JupyterCell> cells) {
+        if (cells.isEmpty()) {
+            return cells;
+        }
+
+        List<JupyterCell> result = new ArrayList<>();
+        JupyterCell firstCell = cells.get(0);
+        
+        if (firstCell.getType().equals(JupyterCell.MARKDOWN_TYPE) && firstCell.getInput() != null) {
+            String markdown = firstCell.getInput();
+            String stripped = stripFirstSectionHeader(markdown);
+            if (!stripped.isEmpty()) {
+                result.add(new JupyterCell(firstCell.getType(), stripped, firstCell.getOutputs()));
+            }
+        } else {
+            result.add(firstCell);
+        }
+
+        result.addAll(cells.subList(1, cells.size()));
+        return result;
+    }
+}
diff --git a/znai-jupyter/src/main/java/org/testingisdocumenting/znai/jupyter/JupyterIncludePlugin.java b/znai-jupyter/src/main/java/org/testingisdocumenting/znai/jupyter/JupyterIncludePlugin.java
@@ -20,21 +20,21 @@
 import org.testingisdocumenting.znai.codesnippets.CodeSnippetsProps;
 import org.testingisdocumenting.znai.core.AuxiliaryFile;
 import org.testingisdocumenting.znai.core.ComponentsRegistry;
+import org.testingisdocumenting.znai.extensions.*;
 import org.testingisdocumenting.znai.resources.ResourcesResolver;
-import org.testingisdocumenting.znai.extensions.PluginParams;
-import org.testingisdocumenting.znai.extensions.PluginResult;
 import org.testingisdocumenting.znai.extensions.include.IncludePlugin;
 import org.testingisdocumenting.znai.parser.ParserHandler;
 import org.testingisdocumenting.znai.parser.commonmark.MarkdownParser;
 import org.testingisdocumenting.znai.utils.JsonUtils;
 
 import java.nio.file.Path;
-import java.util.Collections;
-import java.util.LinkedHashMap;
-import java.util.Map;
+import java.util.*;
 import java.util.stream.Stream;
 
 public class JupyterIncludePlugin implements IncludePlugin {
+    private static final String STORY_FIRST_KEY = "storyFirst";
+    private static final String INCLUDE_SECTION_KEY = "includeSection";
+    private static final String EXCLUDE_SECTION_TITLE_KEY = "excludeSectionTitle";
     private MarkdownParser markdownParser;
     private Path path;
     private String lang;
@@ -51,12 +51,24 @@ public IncludePlugin create() {
         return new JupyterIncludePlugin();
     }
 
+    @Override
+    public PluginParamsDefinition parameters() {
+        PluginParamsDefinition params = new PluginParamsDefinition();
+        params.add(STORY_FIRST_KEY, PluginParamType.BOOLEAN, "put output cells first, before input", "true");
+        params.add(INCLUDE_SECTION_KEY, PluginParamType.LIST_OR_SINGLE_STRING, "only include specified section by title", "Example of Data setup");
+        params.add(EXCLUDE_SECTION_TITLE_KEY, PluginParamType.BOOLEAN, "when include section key is used, excludes the matched title", "true");
+
+        return params;
+    }
+
     @Override
     public PluginResult process(ComponentsRegistry componentsRegistry, ParserHandler parserHandler, Path markupPath, PluginParams pluginParams) {
         markdownParser = componentsRegistry.markdownParser();
         markdownParserHandler = parserHandler;
 
-        isStoryFirst = pluginParams.getOpts().get("storyFirst", false);
+        isStoryFirst = pluginParams.getOpts().get(STORY_FIRST_KEY, false);
+        List<String> includeSection = pluginParams.getOpts().getList(INCLUDE_SECTION_KEY);
+        Boolean excludeSectionTitle = pluginParams.getOpts().get(EXCLUDE_SECTION_TITLE_KEY, false);
 
         ResourcesResolver resourcesResolver = componentsRegistry.resourceResolver();
         path = resourcesResolver.fullPath(pluginParams.getFreeParam());
@@ -65,7 +77,12 @@ public PluginResult process(ComponentsRegistry componentsRegistry, ParserHandler
                 .parse(JsonUtils.deserializeAsMap(resourcesResolver.textContent(path)));
         lang = notebook.getLang();
 
-        notebook.getCells().forEach(this::processCell);
+        List<JupyterCell> cells =
+                !includeSection.isEmpty() ?
+                        collectCells(notebook.getCells(), includeSection, excludeSectionTitle) :
+                        notebook.getCells();
+
+        cells.forEach(this::processCell);
         return PluginResult.docElements(Stream.empty());
     }
 
@@ -74,6 +91,19 @@ public Stream<AuxiliaryFile> auxiliaryFiles(ComponentsRegistry componentsRegistr
         return Stream.of(AuxiliaryFile.builtTime(path));
     }
 
+    private List<JupyterCell> collectCells(List<JupyterCell> cells, List<String> includeSections, Boolean excludeSectionTitle) {
+        List<JupyterCell> result = new ArrayList<>();
+        for (String includeSection : includeSections) {
+            List<JupyterCell> filtered = JupyterCellFilter.fromSection(cells, includeSection, excludeSectionTitle);
+            if (filtered.isEmpty()) {
+                throw new RuntimeException("No cells found for include section: \"" + includeSection + "\"");
+            }
+            result.addAll(filtered);
+        }
+
+        return result;
+    }
+
     private void processCell(JupyterCell cell) {
         processMarkdownCell(cell);
 
@@ -154,12 +184,10 @@ private boolean isMarkdown(JupyterCell cell) {
     }
 
     private Map<String, Object> convertInputData(JupyterCell cell) {
-        switch (cell.getType()) {
-            case JupyterCell.CODE_TYPE:
-                return CodeSnippetsProps.create(lang, cell.getInput());
-            default:
-                return Collections.singletonMap(JupyterOutput.TEXT_FORMAT, cell.getInput());
+        if (cell.getType().equals(JupyterCell.CODE_TYPE)) {
+            return CodeSnippetsProps.create(lang, cell.getInput());
         }
+        return Collections.singletonMap(JupyterOutput.TEXT_FORMAT, cell.getInput());
     }
 
     private Map<String, Object> convertOutputData(JupyterOutput output) {
diff --git a/znai-jupyter/src/main/java/org/testingisdocumenting/znai/jupyter/JupyterParserVer4.java b/znai-jupyter/src/main/java/org/testingisdocumenting/znai/jupyter/JupyterParserVer4.java
@@ -1,4 +1,5 @@
 /*
+ * Copyright 2025 znai maintainers
  * Copyright 2019 TWO SIGMA OPEN SOURCE, LLC
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,7 +26,6 @@
 import static java.util.stream.Collectors.toList;
 
 public class JupyterParserVer4 implements JupyterParser {
-
     public JupyterParserVer4() {
     }
 
@@ -43,14 +43,12 @@ private JupyterCell parseCell(Object o) {
 
         String type = cellContent.get("cell_type").toString();
 
-        switch (type) {
-            case CODE_TYPE:
-                return parseCodeCell(cellContent);
-            case MARKDOWN_TYPE:
-                return parseMarkdownCell(cellContent);
-        }
+        return switch (type) {
+            case CODE_TYPE -> parseCodeCell(cellContent);
+            case MARKDOWN_TYPE -> parseMarkdownCell(cellContent);
+            default -> new JupyterCell("unknown", "", Collections.emptyList());
+        };
 
-        return new JupyterCell("unknown", "", Collections.emptyList());
     }
 
     private JupyterCell parseMarkdownCell(Map<String, ?> cellContent) {
@@ -70,12 +68,10 @@ private JupyterCell parseCodeCell(Map<String, ?> cellContent) {
     @SuppressWarnings("unchecked")
     private JupyterOutput parseOutput(Map<String, ?> outputContent) {
         String type = outputContent.get("output_type").toString();
-        switch (type) {
-            case "stream":
-                return new JupyterOutput(JupyterOutput.TEXT_FORMAT, joinLines(outputContent.get("text")));
-            default:
-                return parseOutputData((Map<String, ?>) outputContent.get("data"));
+        if (type.equals("stream")) {
+            return new JupyterOutput(JupyterOutput.TEXT_FORMAT, joinLines(outputContent.get("text")));
         }
+        return parseOutputData((Map<String, ?>) outputContent.get("data"));
     }
 
     private JupyterOutput parseOutputData(Map<String, ?> data) {
diff --git a/znai-jupyter/src/test/groovy/org/testingisdocumenting/znai/jupyter/JupyterCellFilterTest.groovy b/znai-jupyter/src/test/groovy/org/testingisdocumenting/znai/jupyter/JupyterCellFilterTest.groovy

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+* Add: [Jupyter](snippets/jupyter-notebook) `includeSection`