Skip to content

Commit 4e9b197

Browse files
committed
Tighten new comments across SCIP shard code
1 parent 9726d83 commit 4e9b197

26 files changed

Lines changed: 81 additions & 271 deletions

File tree

build.sbt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -519,8 +519,7 @@ lazy val semanticdbKotlincMinimized = project
519519
val snapDir =
520520
(baseDirectory.value / "src" / "generatedSnapshots" / "resources")
521521
.getAbsolutePath
522-
// Place the aggregated `index.scip` outside the shard-scanned
523-
// targetroot so a subsequent run doesn't re-ingest it as a shard.
522+
// Write `index.scip` outside the shard-scanned targetroot to avoid re-ingestion.
524523
val scipOut = (target.value / "scip-index" / "index.scip")
525524
.getAbsolutePath
526525
val mainCls = "com.sourcegraph.scip_java.ScipJava"

scip-java/src/main/scala/com/sourcegraph/scip_java/commands/IndexSemanticdbCommand.scala

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,8 @@ final case class IndexSemanticdbCommand(
6161
)
6262
allowExportingGlobalSymbolsFromDirectoryEntries: Boolean = true,
6363
@Description(
64-
"If true, walk targetroots for *.scip shards (META-INF/scip/...) emitted by the " +
65-
"compiler plug-ins instead of *.semanticdb files. The aggregator rewrites placeholder " +
66-
"symbols into the final 'scip-java' scheme and merges per-source shards into the " +
67-
"output index. Defaults to true now that both the javac and kotlinc plug-ins emit " +
68-
"shards. Pass --use-scip-shards=false to fall back to the legacy SemanticDB-based " +
69-
"aggregator."
64+
"If true, aggregate *.scip shards under META-INF/scip/ instead of *.semanticdb files. " +
65+
"Pass --use-scip-shards=false to fall back to the SemanticDB-based aggregator."
7066
)
7167
useScipShards: Boolean = true,
7268
@Inline()

scip-java/src/main/scala/com/sourcegraph/scip_java/commands/SnapshotCommand.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,7 @@ case class SnapshotCommand(
5252
): FileVisitResult = {
5353
if (scipPattern.matches(file)) {
5454
val index = Index.parseFrom(Files.readAllBytes(file))
55-
// Skip per-source shards emitted by the compiler plugin (those don't have a
56-
// project_root). The aggregator produces a single top-level index file that
57-
// carries the project_root and is the canonical input for snapshot rendering.
55+
// Skip per-source shards (no project_root); only the aggregator output carries it.
5856
val rawProjectRoot = index.getMetadata.getProjectRoot
5957
if (rawProjectRoot.nonEmpty) {
6058
foundScipFile = true

scip-semanticdb/src/main/java/com/sourcegraph/scip_semanticdb/ScipShardAggregator.java

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,9 @@
2727
import java.util.jar.JarFile;
2828

2929
/**
30-
* Aggregates per-source {@code *.scip} shards into a single {@link Index}.
31-
*
32-
* <p>Steps:
33-
*
34-
* <ol>
35-
* <li>Walk all targetroots for {@code *.scip} shards (and {@code *.jar} files containing them).
36-
* <li>Parse each shard into a {@link Index} containing exactly one {@link Document}.
37-
* <li>Rewrite placeholder global symbols ({@code ". . . . "} prefix) into the final {@code
38-
* "scip-java maven g a v ..."} form via {@link SymbolRewriter}.
39-
* <li>Deduplicate documents by {@code relative_path}, merging occurrences/symbols.
40-
* <li>Compute inverse {@code is_implementation+is_reference} relationships across the project.
41-
* <li>Emit the metadata block and all documents through {@link ScipWriter}.
42-
* </ol>
30+
* Aggregates per-source {@code *.scip} shards into a single {@link Index}: walks targetroots for
31+
* shards, rewrites placeholder symbols via {@link SymbolRewriter}, deduplicates documents by {@code
32+
* relative_path}, attaches inverse relationships, and emits everything through {@link ScipWriter}.
4333
*/
4434
public final class ScipShardAggregator {
4535

@@ -80,8 +70,7 @@ private void run() throws IOException {
8070
options.reporter.startProcessing(shards.size());
8171
writer.emitTyped(metadata());
8272

83-
// Deduplicate documents by relative_path. We rewrite symbols as we go so all merging happens
84-
// on final-form strings.
73+
// Rewrite symbols first so the dedup key is the final form.
8574
LinkedHashMap<String, Document.Builder> merged = new LinkedHashMap<>();
8675
InverseReferenceRelationships inverse = new InverseReferenceRelationships();
8776

@@ -114,8 +103,6 @@ private void run() throws IOException {
114103
options.reporter.endProcessing();
115104
}
116105

117-
// --- shard parsing -------------------------------------------------------
118-
119106
private static final PathMatcher JAR_PATTERN =
120107
FileSystems.getDefault().getPathMatcher("glob:**.jar");
121108

@@ -145,8 +132,6 @@ private static Index parseBytes(byte[] bytes) throws IOException {
145132
return Index.parseFrom(in);
146133
}
147134

148-
// --- rewriting -----------------------------------------------------------
149-
150135
private Document rewriteDocument(Document doc) {
151136
Document.Builder builder = doc.toBuilder().clearOccurrences().clearSymbols();
152137
for (Occurrence occ : doc.getOccurrencesList()) {
@@ -173,15 +158,14 @@ private SymbolInformation rewriteSymbol(SymbolInformation info) {
173158
}
174159

175160
private void mergeInto(Document.Builder target, Document fresh) {
176-
// Deduplicate occurrences by (range, symbol, roles) so that shards produced by repeated
177-
// compilation rounds (or by multiple targetroots) don't introduce duplicate entries.
178-
// Variants that differ only by enclosing_range are collapsed, preferring the one that has it.
161+
// Dedup occurrences by (range, symbol, roles); collapse variants differing only in
162+
// enclosing_range, preferring the one that has it.
179163
LinkedHashMap<OccurrenceKey, Occurrence> occurrences = new LinkedHashMap<>();
180164
for (Occurrence occ : target.getOccurrencesList()) putOccurrence(occurrences, occ);
181165
for (Occurrence occ : fresh.getOccurrencesList()) putOccurrence(occurrences, occ);
182166
target.clearOccurrences().addAllOccurrences(occurrences.values());
183167

184-
// Deduplicate symbols by symbol string; merge relationships of duplicates.
168+
// Dedup symbols by symbol string; merge relationships of duplicates.
185169
LinkedHashMap<String, SymbolInformation> bySymbol = new LinkedHashMap<>();
186170
for (SymbolInformation info : target.getSymbolsList()) {
187171
bySymbol.put(info.getSymbol(), info);
@@ -242,8 +226,6 @@ private static SymbolInformation mergeSymbol(SymbolInformation a, SymbolInformat
242226
return builder.build();
243227
}
244228

245-
// --- metadata ------------------------------------------------------------
246-
247229
private Index metadata() {
248230
return Index.newBuilder()
249231
.setMetadata(
@@ -259,12 +241,9 @@ private Index metadata() {
259241
.build();
260242
}
261243

262-
// --- inverse relationships ----------------------------------------------
263-
264244
/**
265-
* Collects symbols that override or implement other symbols, indexed by the overridden symbol.
266-
* When a final document is emitted, the per-symbol entries are augmented with {@code
267-
* is_implementation && is_reference} relationships pointing back to overriders.
245+
* Collects overriders keyed by the overridden symbol, then augments each final document with
246+
* {@code is_implementation && is_reference} relationships pointing back at the overriders.
268247
*/
269248
private final class InverseReferenceRelationships {
270249

scip-semanticdb/src/main/java/com/sourcegraph/scip_semanticdb/ScipShardWalker.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@
1212
import java.util.List;
1313

1414
/**
15-
* A file visitor that recursively collects per-source SCIP shard files ({@code *.scip}) emitted by
16-
* the compiler plug-ins. Only files under a {@code META-INF/scip/} directory are returned so we
17-
* don't accidentally re-ingest a previously-written aggregate {@code index.scip} that may live in
18-
* the same target tree.
15+
* Collects per-source SCIP shards ({@code *.scip}) emitted by the compiler plug-ins. Restricted to
16+
* {@code META-INF/scip/} so a previously-written aggregate {@code index.scip} in the same target
17+
* tree is not re-ingested.
1918
*/
2019
public class ScipShardWalker extends SimpleFileVisitor<Path> {
2120
private final ArrayList<Path> result;

scip-semanticdb/src/main/java/com/sourcegraph/scip_semanticdb/SymbolRewriter.java

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,13 @@
11
package com.sourcegraph.scip_semanticdb;
22

33
/**
4-
* Rewrites placeholder SCIP global symbols emitted by the compiler plugin into their final form.
5-
*
6-
* <p>The compiler plugin does not know the Maven coordinates ({@code groupId:artifactId:version})
7-
* of the source it is compiling, so it emits global symbols with the sentinel prefix {@code ". . .
8-
* . "}. The aggregator resolves the appropriate coordinates via {@link PackageTable} and rewrites
9-
* those symbols to the final {@code "scip-java"} scheme.
10-
*
11-
* <p>{@code local N} symbols are passed through unchanged.
4+
* Rewrites placeholder SCIP global symbols ({@link #PLACEHOLDER_PREFIX}) emitted by the compiler
5+
* plug-ins into their final {@code "scip-java"} form by resolving Maven coordinates via {@link
6+
* PackageTable}. Local symbols are passed through unchanged.
127
*/
138
public final class SymbolRewriter {
149

15-
/**
16-
* Sentinel prefix used by the compiler plugin for global symbols whose coordinates are not yet
17-
* known.
18-
*/
1910
public static final String PLACEHOLDER_PREFIX = ". . . . ";
20-
21-
/** SCIP scheme used for symbols produced by scip-java. */
2211
public static final String SCIP_JAVA_SCHEME = "scip-java";
2312

2413
private final PackageTable packages;
@@ -27,25 +16,14 @@ public SymbolRewriter(PackageTable packages) {
2716
this.packages = packages;
2817
}
2918

30-
/**
31-
* Returns {@code true} if {@code symbol} is a placeholder global emitted by the compiler plugin.
32-
*/
3319
public static boolean isPlaceholderGlobal(String symbol) {
3420
return symbol != null && symbol.startsWith(PLACEHOLDER_PREFIX);
3521
}
3622

37-
/**
38-
* Returns {@code true} if {@code symbol} is a SCIP local symbol of the form {@code "local N"}.
39-
*/
4023
public static boolean isLocal(String symbol) {
4124
return symbol != null && symbol.startsWith("local ");
4225
}
4326

44-
/**
45-
* Rewrites a placeholder global symbol into its final form using the appropriate package
46-
* coordinates from {@link PackageTable}. Local symbols and already-rewritten symbols are returned
47-
* unchanged.
48-
*/
4927
public String rewrite(String symbol) {
5028
if (symbol == null || symbol.isEmpty()) return symbol;
5129
if (isLocal(symbol)) return symbol;

semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/ScipOccurrences.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,20 @@
99
import java.util.Objects;
1010

1111
/**
12-
* Helpers for deduplicating SCIP {@link Occurrence} entries by their {@code (symbol, range, roles)}
13-
* key. Variants that differ only in whether {@code enclosing_range} is set are collapsed,
14-
* preferring the one that carries the enclosing range.
12+
* Deduplicates SCIP {@link Occurrence} entries by {@code (symbol, range, roles)}. Variants
13+
* differing only in whether {@code enclosing_range} is set are collapsed, preferring the one that
14+
* carries the enclosing range.
1515
*/
1616
final class ScipOccurrences {
1717

1818
private ScipOccurrences() {}
1919

20-
/** Returns a new list with duplicate occurrences collapsed in insertion order. */
2120
static List<Occurrence> deduplicate(List<Occurrence> occurrences) {
2221
LinkedHashMap<Key, Occurrence> out = new LinkedHashMap<>();
2322
for (Occurrence occ : occurrences) put(out, occ);
2423
return new ArrayList<>(out.values());
2524
}
2625

27-
/** Inserts {@code occ} into {@code out}, collapsing duplicates by {@link Key}. */
2826
static void put(Map<Key, Occurrence> out, Occurrence occ) {
2927
Key key = Key.of(occ);
3028
Occurrence existing = out.get(key);

semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/ScipRange.java

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,8 @@
44
import java.util.List;
55

66
/**
7-
* Lightweight value type for source ranges produced by {@link ScipVisitor}.
8-
*
9-
* <p>Coordinates are zero-based and follow the SCIP convention (line + character, end-exclusive
10-
* character). Convertible directly to SCIP's {@code repeated int32 range} representation via {@link
11-
* #asScipRange()}.
7+
* Zero-based source range produced by {@link ScipVisitor}, convertible to SCIP's compact {@code
8+
* repeated int32 range} via {@link #asScipRange()}.
129
*/
1310
final class ScipRange {
1411
final int startLine;
@@ -23,15 +20,12 @@ final class ScipRange {
2320
this.endCharacter = endCharacter;
2421
}
2522

26-
/** Returns a copy with adjusted start/end characters (used to correct for tab-expansion). */
23+
/** Returns a copy with adjusted start/end characters (used to correct tab-expansion). */
2724
ScipRange withCharacters(int startCharacter, int endCharacter) {
2825
return new ScipRange(startLine, startCharacter, endLine, endCharacter);
2926
}
3027

31-
/**
32-
* Encodes the range using SCIP's compact {@code repeated int32 range} layout: 3 ints when the
33-
* range fits on a single line, 4 ints otherwise.
34-
*/
28+
/** 3 ints when the range fits on one line, 4 ints otherwise. */
3529
List<Integer> asScipRange() {
3630
if (startLine == endLine) {
3731
return Arrays.asList(startLine, startCharacter, endCharacter);

semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/ScipRole.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
package com.sourcegraph.semanticdb_javac;
22

3-
/**
4-
* Minimal role enum used by {@link ScipVisitor} when emitting SCIP occurrences. Mirrors the subset
5-
* of {@code Semanticdb.SymbolOccurrence.Role} that the direct-to-SCIP visitor cares about.
6-
*/
3+
/** Role of a SCIP occurrence as seen by {@link ScipVisitor}. */
74
enum ScipRole {
85
DEFINITION,
96
REFERENCE,

semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/ScipShardWriter.java

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,13 @@
1515
import java.util.List;
1616

1717
/**
18-
* Writes and merges per-source SCIP shards produced by the compiler plugin.
19-
*
20-
* <p>Each source file produces a self-contained {@link Index} shard containing a single {@link
21-
* Document}. When a shard already exists on disk (e.g. during annotation processing rounds), the
22-
* new document is merged into the existing one, deduplicating occurrences, symbols and
23-
* relationships.
18+
* Writes a per-source SCIP shard, merging it with any pre-existing shard at the same path (e.g. one
19+
* written by a prior annotation-processing round).
2420
*/
2521
public final class ScipShardWriter {
2622

2723
private ScipShardWriter() {}
2824

29-
/**
30-
* Writes the given {@code shard} to {@code output}, creating parent directories as needed. If
31-
* {@code output} already exists, the existing shard is parsed and merged with the new one.
32-
*/
3325
public static void writeOrMerge(Path output, Index shard) throws IOException {
3426
Files.createDirectories(output.getParent());
3527
if (Files.exists(output)) {
@@ -42,10 +34,7 @@ public static void writeOrMerge(Path output, Index shard) throws IOException {
4234
Files.write(output, shard.toByteArray());
4335
}
4436

45-
/**
46-
* Merges two SCIP shards by combining their document lists. Documents that share a {@code
47-
* relative_path} have their occurrences, symbols and external symbols deduplicated.
48-
*/
37+
/** Merges two shards: documents sharing a {@code relative_path} are dedup-merged. */
4938
static Index merge(Index a, Index b) {
5039
Index.Builder builder = Index.newBuilder();
5140
if (b.hasMetadata()) {
@@ -68,7 +57,7 @@ static Index merge(Index a, Index b) {
6857
}
6958
builder.addAllDocuments(byPath.values());
7059

71-
// External symbols: deduplicate by symbol string. Last writer wins to keep latest data.
60+
// External symbols: last writer wins to keep the most recent data.
7261
LinkedHashMap<String, SymbolInformation> externals = new LinkedHashMap<>();
7362
for (SymbolInformation s : a.getExternalSymbolsList()) externals.put(s.getSymbol(), s);
7463
for (SymbolInformation s : b.getExternalSymbolsList()) externals.put(s.getSymbol(), s);
@@ -78,18 +67,15 @@ static Index merge(Index a, Index b) {
7867
}
7968

8069
private static Document mergeDocuments(Document a, Document b) {
70+
// b.toBuilder() carries forward the most recent language/relative_path/text/encoding.
8171
Document.Builder builder = b.toBuilder().clearOccurrences().clearSymbols();
82-
// Use the most recent metadata for language/relative_path/text/encoding which already
83-
// come from b via toBuilder().
8472

85-
// Deduplicate occurrences by (range, symbol, roles). Variants that differ only in
86-
// enclosing_range get collapsed, preferring the one that carries the enclosing range.
8773
LinkedHashMap<ScipOccurrences.Key, Occurrence> occurrences = new LinkedHashMap<>();
8874
for (Occurrence occ : a.getOccurrencesList()) ScipOccurrences.put(occurrences, occ);
8975
for (Occurrence occ : b.getOccurrencesList()) ScipOccurrences.put(occurrences, occ);
9076
builder.addAllOccurrences(occurrences.values());
9177

92-
// Deduplicate symbols by symbol string; merge relationships and documentation.
78+
// Dedup symbols by symbol string; merge relationships and documentation.
9379
LinkedHashMap<String, SymbolInformation> bySymbol = new LinkedHashMap<>();
9480
for (SymbolInformation info : a.getSymbolsList()) bySymbol.put(info.getSymbol(), info);
9581
for (SymbolInformation info : b.getSymbolsList()) {
@@ -103,13 +89,13 @@ private static Document mergeDocuments(Document a, Document b) {
10389

10490
private static SymbolInformation mergeSymbol(SymbolInformation a, SymbolInformation b) {
10591
SymbolInformation.Builder builder = b.toBuilder();
106-
// Merge relationships, deduplicating by structural equality with deterministic ordering.
92+
// Dedup relationships by structural equality; preserve insertion order.
10793
LinkedHashMap<Relationship, Relationship> rels = new LinkedHashMap<>();
10894
for (Relationship r : a.getRelationshipsList()) rels.put(r, r);
10995
for (Relationship r : b.getRelationshipsList()) rels.put(r, r);
11096
builder.clearRelationships().addAllRelationships(rels.values());
11197

112-
// Merge documentation, preserving order and avoiding duplicates.
98+
// Dedup documentation; preserve insertion order.
11399
List<String> docs = new ArrayList<>(a.getDocumentationList());
114100
for (String d : b.getDocumentationList()) {
115101
if (!docs.contains(d)) docs.add(d);

0 commit comments

Comments
 (0)