Skip to content

Commit 28c4d66

Browse files
jupyter: add includeSection (#1337)
1 parent 3c2003e commit 28c4d66

File tree

7 files changed

+344
-26
lines changed

7 files changed

+344
-26
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
* Add: [Jupyter](snippets/jupyter-notebook) `includeSection`

znai-docs/znai/snippets/jupyter-notebook.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,18 @@ become part of Table Of Contents and part of a search unit.
2020

2121
Note: below text is auto generated, including the **Panda** section
2222
:include-jupyter: src/test/resources/notebook-with-markdown-story.ipynb
23-
23+
24+
# Include Only Specified Sections
25+
26+
Use `includeSection` to select which sections of the notebook to include.
27+
28+
```
29+
:include-jupyter: notebook.ipynb {
30+
includeSection: ["Section One"; "Section Three"],
31+
excludeSectionTitle: true
32+
}
33+
```
34+
2435
# Two Sides
2536

2637
You will learn about the [Two Sides Layout](layout/two-sides-pages) in the Layout section.

znai-jupyter/pom.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,21 @@
5555
</dependency>
5656
</dependencies>
5757

58+
<build>
59+
<plugins>
60+
<plugin>
61+
<groupId>org.codehaus.gmavenplus</groupId>
62+
<artifactId>gmavenplus-plugin</artifactId>
63+
<executions>
64+
<execution>
65+
<goals>
66+
<goal>addTestSources</goal>
67+
<goal>compileTests</goal>
68+
</goals>
69+
</execution>
70+
</executions>
71+
</plugin>
72+
</plugins>
73+
</build>
74+
5875
</project>
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package org.testingisdocumenting.znai.jupyter;
2+
3+
import java.util.ArrayList;
4+
import java.util.Arrays;
5+
import java.util.List;
6+
import java.util.stream.Collectors;
7+
8+
public class JupyterCellFilter {
9+
public static List<JupyterCell> fromSection(List<JupyterCell> cells, String sectionName) {
10+
return fromSection(cells, sectionName, false);
11+
}
12+
13+
public static List<JupyterCell> fromSection(List<JupyterCell> cells, String sectionName, boolean excludeSectionTitle) {
14+
int startIdx = -1;
15+
int endIdx = cells.size();
16+
17+
for (int i = 0; i < cells.size(); i++) {
18+
JupyterCell cell = cells.get(i);
19+
if (cell.getType().equals(JupyterCell.MARKDOWN_TYPE) &&
20+
cell.getInput() != null) {
21+
String input = cell.getInput().trim();
22+
String firstLine = input.contains("\n") ? input.substring(0, input.indexOf("\n")) : input;
23+
if (firstLine.matches("^#+\\s+" + java.util.regex.Pattern.quote(sectionName))) {
24+
startIdx = i;
25+
break;
26+
}
27+
}
28+
}
29+
30+
if (startIdx == -1) {
31+
return new ArrayList<>();
32+
}
33+
34+
for (int i = startIdx + 1; i < cells.size(); i++) {
35+
JupyterCell cell = cells.get(i);
36+
if (cell.getType().equals(JupyterCell.MARKDOWN_TYPE) &&
37+
cell.getInput() != null &&
38+
cell.getInput().trim().matches("^#+ .*")) {
39+
endIdx = i;
40+
break;
41+
}
42+
}
43+
44+
List<JupyterCell> sectionCells = new ArrayList<>();
45+
for (int i = startIdx; i < endIdx; i++) {
46+
sectionCells.add(cells.get(i));
47+
}
48+
49+
return excludeSectionTitle ?
50+
excludeSectionHeader(sectionCells):
51+
sectionCells;
52+
}
53+
54+
private static String stripFirstSectionHeader(String content) {
55+
String[] lines = content.split("\\n");
56+
int firstMatchIdx = 0;
57+
for (String line : lines) {
58+
if (line.strip().startsWith("#")) {
59+
break;
60+
}
61+
62+
firstMatchIdx++;
63+
}
64+
65+
return Arrays.stream(lines).skip(firstMatchIdx + 1).collect(Collectors.joining("\n")).trim();
66+
}
67+
68+
private static List<JupyterCell> excludeSectionHeader(List<JupyterCell> cells) {
69+
if (cells.isEmpty()) {
70+
return cells;
71+
}
72+
73+
List<JupyterCell> result = new ArrayList<>();
74+
JupyterCell firstCell = cells.get(0);
75+
76+
if (firstCell.getType().equals(JupyterCell.MARKDOWN_TYPE) && firstCell.getInput() != null) {
77+
String markdown = firstCell.getInput();
78+
String stripped = stripFirstSectionHeader(markdown);
79+
if (!stripped.isEmpty()) {
80+
result.add(new JupyterCell(firstCell.getType(), stripped, firstCell.getOutputs()));
81+
}
82+
} else {
83+
result.add(firstCell);
84+
}
85+
86+
result.addAll(cells.subList(1, cells.size()));
87+
return result;
88+
}
89+
}

znai-jupyter/src/main/java/org/testingisdocumenting/znai/jupyter/JupyterIncludePlugin.java

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,21 @@
2020
import org.testingisdocumenting.znai.codesnippets.CodeSnippetsProps;
2121
import org.testingisdocumenting.znai.core.AuxiliaryFile;
2222
import org.testingisdocumenting.znai.core.ComponentsRegistry;
23+
import org.testingisdocumenting.znai.extensions.*;
2324
import org.testingisdocumenting.znai.resources.ResourcesResolver;
24-
import org.testingisdocumenting.znai.extensions.PluginParams;
25-
import org.testingisdocumenting.znai.extensions.PluginResult;
2625
import org.testingisdocumenting.znai.extensions.include.IncludePlugin;
2726
import org.testingisdocumenting.znai.parser.ParserHandler;
2827
import org.testingisdocumenting.znai.parser.commonmark.MarkdownParser;
2928
import org.testingisdocumenting.znai.utils.JsonUtils;
3029

3130
import java.nio.file.Path;
32-
import java.util.Collections;
33-
import java.util.LinkedHashMap;
34-
import java.util.Map;
31+
import java.util.*;
3532
import java.util.stream.Stream;
3633

3734
public class JupyterIncludePlugin implements IncludePlugin {
35+
private static final String STORY_FIRST_KEY = "storyFirst";
36+
private static final String INCLUDE_SECTION_KEY = "includeSection";
37+
private static final String EXCLUDE_SECTION_TITLE_KEY = "excludeSectionTitle";
3838
private MarkdownParser markdownParser;
3939
private Path path;
4040
private String lang;
@@ -51,12 +51,24 @@ public IncludePlugin create() {
5151
return new JupyterIncludePlugin();
5252
}
5353

54+
@Override
55+
public PluginParamsDefinition parameters() {
56+
PluginParamsDefinition params = new PluginParamsDefinition();
57+
params.add(STORY_FIRST_KEY, PluginParamType.BOOLEAN, "put output cells first, before input", "true");
58+
params.add(INCLUDE_SECTION_KEY, PluginParamType.LIST_OR_SINGLE_STRING, "only include specified section by title", "Example of Data setup");
59+
params.add(EXCLUDE_SECTION_TITLE_KEY, PluginParamType.BOOLEAN, "when include section key is used, excludes the matched title", "true");
60+
61+
return params;
62+
}
63+
5464
@Override
5565
public PluginResult process(ComponentsRegistry componentsRegistry, ParserHandler parserHandler, Path markupPath, PluginParams pluginParams) {
5666
markdownParser = componentsRegistry.markdownParser();
5767
markdownParserHandler = parserHandler;
5868

59-
isStoryFirst = pluginParams.getOpts().get("storyFirst", false);
69+
isStoryFirst = pluginParams.getOpts().get(STORY_FIRST_KEY, false);
70+
List<String> includeSection = pluginParams.getOpts().getList(INCLUDE_SECTION_KEY);
71+
Boolean excludeSectionTitle = pluginParams.getOpts().get(EXCLUDE_SECTION_TITLE_KEY, false);
6072

6173
ResourcesResolver resourcesResolver = componentsRegistry.resourceResolver();
6274
path = resourcesResolver.fullPath(pluginParams.getFreeParam());
@@ -65,7 +77,12 @@ public PluginResult process(ComponentsRegistry componentsRegistry, ParserHandler
6577
.parse(JsonUtils.deserializeAsMap(resourcesResolver.textContent(path)));
6678
lang = notebook.getLang();
6779

68-
notebook.getCells().forEach(this::processCell);
80+
List<JupyterCell> cells =
81+
!includeSection.isEmpty() ?
82+
collectCells(notebook.getCells(), includeSection, excludeSectionTitle) :
83+
notebook.getCells();
84+
85+
cells.forEach(this::processCell);
6986
return PluginResult.docElements(Stream.empty());
7087
}
7188

@@ -74,6 +91,19 @@ public Stream<AuxiliaryFile> auxiliaryFiles(ComponentsRegistry componentsRegistr
7491
return Stream.of(AuxiliaryFile.builtTime(path));
7592
}
7693

94+
private List<JupyterCell> collectCells(List<JupyterCell> cells, List<String> includeSections, Boolean excludeSectionTitle) {
95+
List<JupyterCell> result = new ArrayList<>();
96+
for (String includeSection : includeSections) {
97+
List<JupyterCell> filtered = JupyterCellFilter.fromSection(cells, includeSection, excludeSectionTitle);
98+
if (filtered.isEmpty()) {
99+
throw new RuntimeException("No cells found for include section: \"" + includeSection + "\"");
100+
}
101+
result.addAll(filtered);
102+
}
103+
104+
return result;
105+
}
106+
77107
private void processCell(JupyterCell cell) {
78108
processMarkdownCell(cell);
79109

@@ -154,12 +184,10 @@ private boolean isMarkdown(JupyterCell cell) {
154184
}
155185

156186
private Map<String, Object> convertInputData(JupyterCell cell) {
157-
switch (cell.getType()) {
158-
case JupyterCell.CODE_TYPE:
159-
return CodeSnippetsProps.create(lang, cell.getInput());
160-
default:
161-
return Collections.singletonMap(JupyterOutput.TEXT_FORMAT, cell.getInput());
187+
if (cell.getType().equals(JupyterCell.CODE_TYPE)) {
188+
return CodeSnippetsProps.create(lang, cell.getInput());
162189
}
190+
return Collections.singletonMap(JupyterOutput.TEXT_FORMAT, cell.getInput());
163191
}
164192

165193
private Map<String, Object> convertOutputData(JupyterOutput output) {

znai-jupyter/src/main/java/org/testingisdocumenting/znai/jupyter/JupyterParserVer4.java

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/*
2+
* Copyright 2025 znai maintainers
23
* Copyright 2019 TWO SIGMA OPEN SOURCE, LLC
34
*
45
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,7 +26,6 @@
2526
import static java.util.stream.Collectors.toList;
2627

2728
public class JupyterParserVer4 implements JupyterParser {
28-
2929
public JupyterParserVer4() {
3030
}
3131

@@ -43,14 +43,12 @@ private JupyterCell parseCell(Object o) {
4343

4444
String type = cellContent.get("cell_type").toString();
4545

46-
switch (type) {
47-
case CODE_TYPE:
48-
return parseCodeCell(cellContent);
49-
case MARKDOWN_TYPE:
50-
return parseMarkdownCell(cellContent);
51-
}
46+
return switch (type) {
47+
case CODE_TYPE -> parseCodeCell(cellContent);
48+
case MARKDOWN_TYPE -> parseMarkdownCell(cellContent);
49+
default -> new JupyterCell("unknown", "", Collections.emptyList());
50+
};
5251

53-
return new JupyterCell("unknown", "", Collections.emptyList());
5452
}
5553

5654
private JupyterCell parseMarkdownCell(Map<String, ?> cellContent) {
@@ -70,12 +68,10 @@ private JupyterCell parseCodeCell(Map<String, ?> cellContent) {
7068
@SuppressWarnings("unchecked")
7169
private JupyterOutput parseOutput(Map<String, ?> outputContent) {
7270
String type = outputContent.get("output_type").toString();
73-
switch (type) {
74-
case "stream":
75-
return new JupyterOutput(JupyterOutput.TEXT_FORMAT, joinLines(outputContent.get("text")));
76-
default:
77-
return parseOutputData((Map<String, ?>) outputContent.get("data"));
71+
if (type.equals("stream")) {
72+
return new JupyterOutput(JupyterOutput.TEXT_FORMAT, joinLines(outputContent.get("text")));
7873
}
74+
return parseOutputData((Map<String, ?>) outputContent.get("data"));
7975
}
8076

8177
private JupyterOutput parseOutputData(Map<String, ?> data) {

0 commit comments

Comments
 (0)