Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,5 @@ UnityFileSystemTestData/UserSettings/
UnityFileSystemTestData/Packages/
*.db
*.csv

*.stackdump
2 changes: 2 additions & 0 deletions Analyzer/AnalyzerTool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public int Analyze(
string databaseName,
string searchPattern,
bool skipReferences,
bool skipCrc,
bool verbose,
bool noRecursion)
{
Expand All @@ -40,6 +41,7 @@ public int Analyze(
{
parser.Verbose = verbose;
parser.SkipReferences = skipReferences;
parser.SkipCrc = skipCrc;
parser.Init(writer.Connection);

}
Expand Down
311 changes: 228 additions & 83 deletions Analyzer/PPtrAndCrcProcessor.cs

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Analyzer/SQLite/Handlers/ISQLiteHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ public interface ISQLiteFileParser : IDisposable
void Parse(string filename);
public bool Verbose { get; set; }
public bool SkipReferences { get; set; }
public bool SkipCrc { get; set; }
}
1 change: 1 addition & 0 deletions Analyzer/SQLite/Parsers/AddressablesBuildLayoutParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class AddressablesBuildLayoutParser : ISQLiteFileParser

public bool Verbose { get; set; }
public bool SkipReferences { get; set; }
public bool SkipCrc { get; set; }

public void Dispose()
{
Expand Down
3 changes: 2 additions & 1 deletion Analyzer/SQLite/Parsers/SerializedFileParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class SerializedFileParser : ISQLiteFileParser

public bool Verbose { get; set; }
public bool SkipReferences { get; set; }
public bool SkipCrc { get; set; }

public bool CanParse(string filename)
{
Expand All @@ -36,7 +37,7 @@ public void Dispose()

public void Init(SqliteConnection db)
{
m_Writer = new SerializedFileSQLiteWriter(db, SkipReferences);
m_Writer = new SerializedFileSQLiteWriter(db, SkipReferences, SkipCrc);
}

public void Parse(string filename)
Expand Down
31 changes: 22 additions & 9 deletions Analyzer/SQLite/Writers/SerializedFileSQLiteWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class SerializedFileSQLiteWriter : IDisposable
private int m_NextAssetBundleId = 0;

private bool m_SkipReferences;
private bool m_SkipCrc;

private IdProvider<string> m_SerializedFileIdProvider = new();
private ObjectIdProvider m_ObjectIdProvider = new();
Expand Down Expand Up @@ -54,11 +55,12 @@ public class SerializedFileSQLiteWriter : IDisposable
private SqliteConnection m_Database;
private SqliteCommand m_LastId = new SqliteCommand();
private SqliteTransaction m_CurrentTransaction = null;
public SerializedFileSQLiteWriter(SqliteConnection database, bool skipReferences)
public SerializedFileSQLiteWriter(SqliteConnection database, bool skipReferences, bool skipCrc)
{
m_Initialized = false;
m_Database = database;
m_SkipReferences = skipReferences;
m_SkipCrc = skipCrc;
}

public void Init()
Expand Down Expand Up @@ -116,7 +118,7 @@ public void WriteSerializedFile(string relativePath, string fullPath, string con
{
using var sf = UnityFileSystem.OpenSerializedFile(fullPath);
using var reader = new UnityFileReader(fullPath, 64 * 1024 * 1024);
using var pptrReader = new PPtrAndCrcProcessor(sf, reader, containingFolder, AddReference);
using var pptrReader = new PPtrAndCrcProcessor(sf, reader, containingFolder, m_SkipCrc, AddReference);
int serializedFileId = m_SerializedFileIdProvider.GetId(Path.GetFileName(fullPath).ToLower());
int sceneId = -1;

Expand Down Expand Up @@ -228,7 +230,10 @@ public void WriteSerializedFile(string relativePath, string fullPath, string con
m_AddObjectCommand.SetValue("game_object", "");
}

if (!m_SkipReferences)
// The walk both extracts references and accumulates the CRC, so it is needed
// unless both are disabled. When CRC is on but references are off, the walk
// still resolves referenced object ids (AddReference skips the insert).
if (!m_SkipReferences || !m_SkipCrc)
{
crc32 = pptrReader.Process(currentObjectId, offset, root);
}
Expand Down Expand Up @@ -264,15 +269,23 @@ public void WriteSerializedFile(string relativePath, string fullPath, string con
}
}

// Callback from PPtrAndCrcProcessor for each reference discovered in the SerializedFile
private int AddReference(long objectId, int fileId, long pathId, string propertyPath, string propertyType)
{
// Always resolve the id so the CRC stays stable; only persist the row when references
// are being extracted.
var referencedObjectId = m_ObjectIdProvider.GetId((m_LocalToDbFileId[fileId], pathId));
m_AddReferenceCommand.SetTransaction(m_CurrentTransaction);
m_AddReferenceCommand.SetValue("object", objectId);
m_AddReferenceCommand.SetValue("referenced_object", referencedObjectId);
m_AddReferenceCommand.SetValue("property_path", propertyPath);
m_AddReferenceCommand.SetValue("property_type", propertyType);
m_AddReferenceCommand.ExecuteNonQuery();

if (!m_SkipReferences)
{
m_AddReferenceCommand.SetTransaction(m_CurrentTransaction);
m_AddReferenceCommand.SetValue("object", objectId);
m_AddReferenceCommand.SetValue("referenced_object", referencedObjectId);
m_AddReferenceCommand.SetValue("property_path", propertyPath);
m_AddReferenceCommand.SetValue("property_type", propertyType);
m_AddReferenceCommand.ExecuteNonQuery();
}

return referencedObjectId;
}

Expand Down
2 changes: 1 addition & 1 deletion Documentation/analyzer.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ case, Unity will include the asset in all the AssetBundles with a reference to i
view_potential_duplicates provides the number of instances and the total size of the potentially
duplicated assets. It also lists all the AssetBundles where the asset was found.

If the skipReferences option is used, there will be a lot of false positives in that view. Otherwise,
If the `--skip-crc` option is used, there will be a lot of false positives in that view. Otherwise,
it should be very accurate because CRCs are used to determine if objects are identical.

## asset_view (AssetBundleProcessor)
Expand Down
31 changes: 18 additions & 13 deletions Documentation/command-analyze.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ UnityDataTool analyze <path> [options]
| `<path>` | Path to folder containing files to analyze | *(required)* |
| `-o, --output-file <file>` | Output database filename | `database.db` |
| `-p, --search-pattern <pattern>` | File search pattern (`*` and `?` supported) | `*` |
| `-s, --skip-references` | Skip CRC and reference extraction (faster, smaller DB) | `false` |
| `-s, --skip-references` | Do not extract references (smaller DB, no `refs` table). CRC is still computed. | `false` |
| `--skip-crc` | Skip the CRC32 checksum calculation (faster; `objects.crc32` will be 0) | `false` |
| `-v, --verbose` | Show more information during analysis | `false` |
| `--no-recurse` | Do not recurse into sub-directories | `false` |
| `-d, --typetree-data <file>` | Load an external TypeTree data file before processing (Unity 6.5+) | — |
Expand All @@ -30,9 +31,9 @@ Analyze only `.bundle` files and specify a custom database name:
UnityDataTool analyze /path/to/asset/bundles -o my_database.db -p "*.bundle"
```

Fast analysis (skip reference tracking):
Fastest analysis (skip both reference extraction and CRC):
```bash
UnityDataTool analyze /path/to/bundles -s
UnityDataTool analyze /path/to/bundles --skip-references --skip-crc
```

See also [Analyze Examples](../../Documentation/analyze-examples.md).
Expand Down Expand Up @@ -121,23 +122,27 @@ See [Comparing Builds](../../Documentation/comparing-builds.md) for strategies t

### Slow Analyze times, large output database

Consider using the `--skip-references` argument.
Two independent flags reduce analyze time and database size:

A real life analyze of a big Addressables build shows how large a difference this can make:
* `--skip-crc` skips the CRC32 calculation. This is usually the largest time saver, because computing a CRC requires reading the full content of every object, including large texture, mesh and audio data in companion `.resS`/`.resource` files.
* `--skip-references` skips reference extraction, which is the largest contributor to database size (the `refs` table). The references are not needed for core asset inventory and size information.

* 208 seconds and producted a 500MB database (not specifying --skip-reference)
* 9 seconds and produced a 68 MB file (with --skip-reference)
For the fastest, smallest result, combine them.

The references are not needed for core asset inventory and size information.
A real life analyze of a big Addressables build, skipping both references and CRC, shows how large a difference this can make:

Note: When specifying `--skip-reference` some functionality is lost:
* 208 seconds and produced a 500MB database (default)
* 9 seconds and produced a 68 MB file (with `--skip-references --skip-crc`)

When `--skip-references` is used, some functionality is lost:

* the `find-refs` command will not work
* `view_material_shader_refs` and `view_material_texture_refs` will be empty
* `script_object_view` will be empty
* Queries that look at the relationship between objects will not work. For example the refs table is required to link between a `MonoBehaviour` and its `MonoScript`.
* The `objects.crc32` column will be NULL/0 for all objects. This means:
* No detection of identical objects by content hash (See [Comparing Builds](../../Documentation/comparing-builds.md))
* The `view_potential_duplicates` view relies partially on CRC32 to distinguish true duplicates

Future work: The refs table contains a lot of repeated strings and could be made smaller and more efficient. It might also be prudent to control the CRC32 calculation using an independent flag.
When `--skip-crc` is used, the `objects.crc32` column will be 0 for all objects. This means:

* No detection of identical objects by content hash (See [Comparing Builds](../../Documentation/comparing-builds.md))
* The `view_potential_duplicates` view relies partially on CRC32 to distinguish true duplicates

36 changes: 27 additions & 9 deletions UnityDataTool/Program.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.CommandLine;
using System.CommandLine.Invocation;
using System.IO;
using System.Threading.Tasks;
using UnityDataTools.Analyzer;
Expand Down Expand Up @@ -41,7 +42,8 @@ static Command BuildAnalyzeCommand()
{
var pathArg = new Argument<DirectoryInfo>("path", "The path to the directory containing the files to analyze").ExistingOnly();
var oOpt = new Option<string>(aliases: new[] { "--output-file", "-o" }, description: "Filename of the output database", getDefaultValue: () => "database.db");
var sOpt = new Option<bool>(aliases: new[] { "--skip-references", "-s" }, description: "Skip CRC and do not extract references");
var sOpt = new Option<bool>(aliases: new[] { "--skip-references", "-s" }, description: "Do not extract references (CRC is still computed unless --skip-crc is also given)");
var scOpt = new Option<bool>(aliases: new[] { "--skip-crc" }, description: "Skip CRC checksum calculation");
var rOpt = new Option<bool>(aliases: new[] { "--extract-references", "-r" }) { IsHidden = true };
var pOpt = new Option<string>(aliases: new[] { "--search-pattern", "-p" }, description: "File search pattern", getDefaultValue: () => "*");
var vOpt = new Option<bool>(aliases: new[] { "--verbose", "-v" }, description: "Verbose output");
Expand All @@ -53,6 +55,7 @@ static Command BuildAnalyzeCommand()
pathArg,
oOpt,
sOpt,
scOpt,
rOpt,
pOpt,
vOpt,
Expand All @@ -61,14 +64,28 @@ static Command BuildAnalyzeCommand()
};

analyzeCommand.AddAlias("analyse");
analyzeCommand.SetHandler(
(DirectoryInfo di, string o, bool s, bool r, string p, bool v, bool noRecurse, FileInfo d) =>
// Bound via InvocationContext because the option count exceeds the strongly-typed
// SetHandler overloads.
analyzeCommand.SetHandler((InvocationContext context) =>
{
var d = context.ParseResult.GetValueForOption(dOpt);
var ttResult = LoadTypeTreeDataFile(d);
if (ttResult != 0)
{
var ttResult = LoadTypeTreeDataFile(d);
if (ttResult != 0) return Task.FromResult(ttResult);
return Task.FromResult(HandleAnalyze(di, o, s, r, p, v, noRecurse));
},
pathArg, oOpt, sOpt, rOpt, pOpt, vOpt, recurseOpt, dOpt);
context.ExitCode = ttResult;
return;
}

context.ExitCode = HandleAnalyze(
context.ParseResult.GetValueForArgument(pathArg),
context.ParseResult.GetValueForOption(oOpt),
context.ParseResult.GetValueForOption(sOpt),
context.ParseResult.GetValueForOption(scOpt),
context.ParseResult.GetValueForOption(rOpt),
context.ParseResult.GetValueForOption(pOpt),
context.ParseResult.GetValueForOption(vOpt),
context.ParseResult.GetValueForOption(recurseOpt));
});

return analyzeCommand;
}
Expand Down Expand Up @@ -293,6 +310,7 @@ static int HandleAnalyze(
DirectoryInfo path,
string outputFile,
bool skipReferences,
bool skipCrc,
bool extractReferences,
string searchPattern,
bool verbose,
Expand All @@ -305,7 +323,7 @@ static int HandleAnalyze(
Console.WriteLine("WARNING: --extract-references, -r option is deprecated (references are now extracted by default)");
}

return analyzer.Analyze(path.FullName, outputFile, searchPattern, skipReferences, verbose, noRecurse);
return analyzer.Analyze(path.FullName, outputFile, searchPattern, skipReferences, skipCrc, verbose, noRecurse);
}

static int HandleFindReferences(FileInfo databasePath, string outputFile, long? objectId, string objectName, string objectType, bool findAll)
Expand Down
19 changes: 19 additions & 0 deletions UnityFileSystem.Tests/UnityFileSystemTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,25 @@ public void ReadFile_InvalidHandle_ThrowsException()
Assert.Throws<ObjectDisposedException>(() => file.Read(10, new byte[10]));
}

// Ranges that cross the internal buffer boundary (and a partial final chunk) must
// produce the same CRC as a single-buffer read. TextFile.txt is 21 bytes; an 8-byte
// buffer forces three chunks (8 + 8 + 5).
[TestCase(0, 21)] // whole file, partial final chunk
[TestCase(0, 16)] // exact multiple of the buffer size
[TestCase(3, 15)] // unaligned start, crosses two boundaries
[TestCase(0, 8)] // exactly one buffer
[TestCase(2, 5)] // entirely within one buffer
public void ComputeCRC_RangeCrossingBuffer_MatchesSingleBufferRead(long offset, int size)
{
var path = Path.Combine(Context.TestDataFolder, "TextFile.txt");

using var singleBufferReader = new UnityFileReader(path, 1024 * 1024);
var expected = singleBufferReader.ComputeCRC(offset, size);

using var smallBufferReader = new UnityFileReader(path, 8);
Assert.AreEqual(expected, smallBufferReader.ComputeCRC(offset, size));
}

[Test]
public void OpenFile_ArchiveFileSystem_ReturnsFile()
{
Expand Down
14 changes: 8 additions & 6 deletions UnityFileSystem/UnityFileReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -117,16 +117,18 @@ public byte ReadUInt8(long fileOffset)
return m_Buffer[offset];
}

// Computes the CRC32 over a contiguous range, reading the file in buffer-sized chunks.
public uint ComputeCRC(long fileOffset, int size, uint crc32 = 0)
{
var readSize = size > m_Buffer.Length ? m_Buffer.Length : size;
var readBytes = 0;
var remaining = size;

while (readBytes < size)
while (remaining > 0)
{
var offset = GetBufferOffset(fileOffset, readSize);
crc32 = Crc32Algorithm.Append(crc32, m_Buffer, offset, readSize);
readBytes += readSize;
var chunk = (int)Math.Min(m_Buffer.Length, remaining);
var offset = GetBufferOffset(fileOffset, chunk);
crc32 = Crc32Algorithm.Append(crc32, m_Buffer, offset, chunk);
fileOffset += chunk;
remaining -= chunk;
}

return crc32;
Expand Down
Loading