Skip to content

Commit 967de58

Browse files
Merge pull request #3546 from petercrabtree/dev/dev-env-clean-no-bom
Normalize UTF-8 BOM Marks and ps1 Indention
2 parents 48ff851 + b4fb59a commit 967de58

File tree

1,357 files changed

+1853
-1467
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,357 files changed

+1853
-1467
lines changed

.editorconfig

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,18 @@
22
root = true
33

44
[*]
5+
charset = utf-8 # standardize on no BOM (except resx, see below)
56
indent_style = tab
67
indent_size = 4
78
guidelines = 110
89
tab_width = 4
910
end_of_line = crlf
1011

12+
# .net tooling writes the BOM to resx files on running dotnet build ILSpy.sln regardless,
13+
# so we should direct text editors to NOT change the file
14+
[*.resx]
15+
charset = utf-8-bom
16+
1117
[*.il]
1218
indent_style = space
1319
indent_size = 2

BuildTools/bom-classify-encodings.ps1

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
<#
2+
.SYNOPSIS
3+
Classify text files by encoding under the current subtree, respecting .gitignore.
4+
5+
.DESCRIPTION
6+
Enumerates tracked files and untracked-but-not-ignored files (via Git) beneath
7+
PWD. Skips likely-binary files (NUL probe). Classifies remaining files as:
8+
- 'utf8' : valid UTF-8 (no BOM) or empty file
9+
- 'utf8-with-bom' : starts with UTF-8 BOM (EF BB BF)
10+
- 'other' : text but not valid UTF-8 (e.g., UTF-16/ANSI)
11+
12+
Outputs:
13+
1) Relative paths of files classified as 'other'
14+
2) A table by extension: UTF8 / UTF8-with-BOM / Other / Total
15+
16+
Notes:
17+
- Read-only: this script makes no changes.
18+
- Requires Git and must be run inside a Git work tree.
19+
#>
20+
21+
[CmdletBinding()]
22+
param()
23+
24+
Set-StrictMode -Version Latest
25+
$ErrorActionPreference = 'Stop'
26+
27+
# --- Git enumeration ---------------------------------------------------------
28+
function Assert-InGitWorkTree {
29+
# Throws if not inside a Git work tree.
30+
$inside = (& git rev-parse --is-inside-work-tree 2>$null).Trim()
31+
if ($LASTEXITCODE -ne 0 -or $inside -ne 'true') {
32+
throw 'Not in a Git work tree.'
33+
}
34+
}
35+
36+
function Get-GitFilesUnderPwd {
37+
<#
38+
Returns full paths to tracked + untracked-not-ignored files under PWD.
39+
#>
40+
Assert-InGitWorkTree
41+
42+
$repoRoot = (& git rev-parse --show-toplevel).Trim()
43+
$pwdPath = (Get-Location).Path
44+
45+
# cached (tracked) + others (untracked not ignored)
46+
$nulSeparated = & git -C $repoRoot ls-files -z --cached --others --exclude-standard
47+
48+
$relativePaths = $nulSeparated.Split(
49+
[char]0, [System.StringSplitOptions]::RemoveEmptyEntries)
50+
51+
foreach ($relPath in $relativePaths) {
52+
$fullPath = Join-Path $repoRoot $relPath
53+
54+
# Only include files under the current subtree.
55+
if ($fullPath.StartsWith($pwdPath,
56+
[System.StringComparison]::OrdinalIgnoreCase)) {
57+
if (Test-Path -LiteralPath $fullPath -PathType Leaf) { $fullPath }
58+
}
59+
}
60+
}
61+
62+
# --- Probes ------------------------------------------------------------------
63+
function Test-ProbablyBinary {
64+
# Heuristic: treat as binary if the first 8 KiB contains any NUL byte.
65+
param([Parameter(Mandatory)][string]$Path)
66+
67+
try {
68+
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
69+
try {
70+
$len = [int][Math]::Min(8192,$stream.Length)
71+
if ($len -le 0) { return $false }
72+
73+
$buffer = [byte[]]::new($len)
74+
[void]$stream.Read($buffer,0,$len)
75+
return ($buffer -contains 0)
76+
}
77+
finally { $stream.Dispose() }
78+
}
79+
catch { return $false }
80+
}
81+
82+
function Get-TextEncodingCategory {
83+
# Returns 'utf8', 'utf8-with-bom', 'other', or $null for likely-binary.
84+
param([Parameter(Mandatory)][string]$Path)
85+
86+
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
87+
try {
88+
$fileLength = $stream.Length
89+
if ($fileLength -eq 0) { return 'utf8' }
90+
91+
# BOM check (EF BB BF)
92+
$header = [byte[]]::new([Math]::Min(3,$fileLength))
93+
[void]$stream.Read($header,0,$header.Length)
94+
if ($header.Length -ge 3 -and
95+
$header[0] -eq 0xEF -and $header[1] -eq 0xBB -and $header[2] -eq 0xBF) {
96+
return 'utf8-with-bom'
97+
}
98+
99+
# Quick binary probe before expensive decoding
100+
$stream.Position = 0
101+
$sampleLen = [int][Math]::Min(8192,$fileLength)
102+
$sample = [byte[]]::new($sampleLen)
103+
[void]$stream.Read($sample,0,$sampleLen)
104+
if ($sample -contains 0) { return $null }
105+
}
106+
finally { $stream.Dispose() }
107+
108+
# Validate UTF-8 by decoding with throw-on-invalid option (no BOM).
109+
try {
110+
$bytes = [System.IO.File]::ReadAllBytes($Path)
111+
$utf8 = [System.Text.UTF8Encoding]::new($false,$true)
112+
[void]$utf8.GetString($bytes)
113+
return 'utf8'
114+
}
115+
catch { return 'other' }
116+
}
117+
118+
# --- Main --------------------------------------------------------------------
119+
$otherFiles = @()
120+
$byExtension = @{}
121+
122+
$allFiles = Get-GitFilesUnderPwd
123+
124+
foreach ($fullPath in $allFiles) {
125+
# Avoid decoding likely-binary files.
126+
if (Test-ProbablyBinary $fullPath) { continue }
127+
128+
$category = Get-TextEncodingCategory $fullPath
129+
if (-not $category) { continue }
130+
131+
$ext = [IO.Path]::GetExtension($fullPath).ToLower()
132+
if (-not $byExtension.ContainsKey($ext)) {
133+
$byExtension[$ext] = @{ 'utf8' = 0; 'utf8-with-bom' = 0; 'other' = 0 }
134+
}
135+
136+
$byExtension[$ext][$category]++
137+
138+
if ($category -eq 'other') {
139+
$otherFiles += (Resolve-Path -LiteralPath $fullPath -Relative)
140+
}
141+
}
142+
143+
# 1) Files in 'other'
144+
if ($otherFiles.Count -gt 0) {
145+
'Files classified as ''other'':'
146+
$otherFiles | Sort-Object | ForEach-Object { " $_" }
147+
''
148+
}
149+
150+
# 2) Table by extension
151+
$rows = foreach ($kv in $byExtension.GetEnumerator()) {
152+
$ext = if ($kv.Key) { $kv.Key } else { '[noext]' }
153+
$u = [int]$kv.Value['utf8']
154+
$b = [int]$kv.Value['utf8-with-bom']
155+
$o = [int]$kv.Value['other']
156+
157+
[PSCustomObject]@{
158+
Extension = $ext
159+
UTF8 = $u
160+
'UTF8-with-BOM' = $b
161+
Other = $o
162+
Total = $u + $b + $o
163+
}
164+
}
165+
166+
$rows |
167+
Sort-Object -Property (
168+
@{Expression='Total';Descending=$true},
169+
@{Expression='Extension';Descending=$false}
170+
) |
171+
Format-Table -AutoSize

BuildTools/bom-strip.ps1

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
<#
2+
.SYNOPSIS
3+
Strip UTF-8 BOM from selected text files under the current subtree, respecting
4+
.gitignore.
5+
6+
.DESCRIPTION
7+
Enumerates tracked and untracked-but-not-ignored files under the current
8+
directory (via Git), filters to texty extensions and dotfiles, skips likely
9+
binary files (NUL probe), and removes a leading UTF-8 BOM (EF BB BF) in place.
10+
11+
Refuses to run if there are uncommitted changes as a safeguard. Use -Force to override.
12+
Supports -WhatIf/-Confirm via ShouldProcess.
13+
#>
14+
15+
[CmdletBinding(SupportsShouldProcess = $true, ConfirmImpact = 'Low')]
16+
param(
17+
[switch]$Force
18+
)
19+
20+
Set-StrictMode -Version Latest
21+
$ErrorActionPreference = 'Stop'
22+
23+
# --- File sets (ILSpy) ------------------------------------------------------
24+
$Dotfiles = @(
25+
'.gitignore', '.editorconfig', '.gitattributes', '.gitmodules',
26+
'.tgitconfig', '.vsconfig'
27+
)
28+
29+
$AllowedExts = @(
30+
'.bat','.config','.cs','.csproj','.css','.filelist','.fs','.html','.il',
31+
'.ipynb','.js','.json','.less','.manifest','.md','.projitems','.props',
32+
'.ps1','.psd1','.ruleset','.shproj','.sln','.slnf','.svg','.template',
33+
'.tt', '.txt','.vb','.vsct','.vsixlangpack','.wxl','.xaml','.xml','.xshd','.yml'
34+
)
35+
36+
$IncludeNoExt = $true # include names like LICENSE
37+
38+
# --- Git checks / enumeration -----------------------------------------------
39+
function Assert-InGitWorkTree {
40+
$inside = (& git rev-parse --is-inside-work-tree 2>$null).Trim()
41+
if ($LASTEXITCODE -ne 0 -or $inside -ne 'true') {
42+
throw 'Not in a Git work tree.'
43+
}
44+
}
45+
46+
function Assert-CleanWorkingTree {
47+
if ($Force) { return }
48+
49+
$status = & git status --porcelain -z
50+
if ($LASTEXITCODE -ne 0) { throw 'git status failed.' }
51+
52+
if (-not [string]::IsNullOrEmpty($status)) {
53+
throw 'Working tree not clean. Commit/stash changes or use -Force.'
54+
}
55+
}
56+
57+
function Get-GitFilesUnderPwd {
58+
Assert-InGitWorkTree
59+
60+
$repoRoot = (& git rev-parse --show-toplevel).Trim()
61+
$pwdPath = (Get-Location).Path
62+
63+
$tracked = & git -C $repoRoot ls-files -z
64+
$others = & git -C $repoRoot ls-files --others --exclude-standard -z
65+
66+
$allRel = ("$tracked$others").Split(
67+
[char]0, [System.StringSplitOptions]::RemoveEmptyEntries)
68+
69+
foreach ($relPath in $allRel) {
70+
$fullPath = Join-Path $repoRoot $relPath
71+
if ($fullPath.StartsWith($pwdPath,
72+
[System.StringComparison]::OrdinalIgnoreCase)) {
73+
if (Test-Path -LiteralPath $fullPath -PathType Leaf) {
74+
$fullPath
75+
}
76+
}
77+
}
78+
}
79+
80+
# --- Probes -----------------------------------------------------------------
81+
function Test-HasUtf8Bom {
82+
param([Parameter(Mandatory)][string]$Path)
83+
84+
try {
85+
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
86+
try {
87+
if ($stream.Length -lt 3) { return $false }
88+
89+
$header = [byte[]]::new(3)
90+
[void]$stream.Read($header,0,3)
91+
92+
return ($header[0] -eq 0xEF -and
93+
$header[1] -eq 0xBB -and
94+
$header[2] -eq 0xBF)
95+
}
96+
finally {
97+
$stream.Dispose()
98+
}
99+
}
100+
catch { return $false }
101+
}
102+
103+
function Test-ProbablyBinary {
104+
# Binary if the first 8 KiB contains any NUL byte.
105+
param([Parameter(Mandatory)][string]$Path)
106+
107+
try {
108+
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
109+
try {
110+
$len = [int][Math]::Min(8192,$stream.Length)
111+
if ($len -le 0) { return $false }
112+
113+
$buffer = [byte[]]::new($len)
114+
[void]$stream.Read($buffer,0,$len)
115+
116+
return ($buffer -contains 0)
117+
}
118+
finally {
119+
$stream.Dispose()
120+
}
121+
}
122+
catch { return $false }
123+
}
124+
125+
# --- Mutation ---------------------------------------------------------------
126+
function Remove-Utf8BomInPlace {
127+
# Write the existing buffer from offset 3, no extra full-size allocation.
128+
param([Parameter(Mandatory)][string]$Path)
129+
130+
$bytes = [System.IO.File]::ReadAllBytes($Path)
131+
if ($bytes.Length -lt 3) { return $false }
132+
133+
if ($bytes[0] -ne 0xEF -or
134+
$bytes[1] -ne 0xBB -or
135+
$bytes[2] -ne 0xBF) {
136+
return $false
137+
}
138+
139+
$stream = [System.IO.File]::Open($Path,'Create','Write','ReadWrite')
140+
try {
141+
$stream.Write($bytes, 3, $bytes.Length - 3)
142+
$stream.SetLength($bytes.Length - 3)
143+
}
144+
finally {
145+
$stream.Dispose()
146+
}
147+
148+
return $true
149+
}
150+
151+
# --- Main -------------------------------------------------------------------
152+
Assert-InGitWorkTree
153+
Assert-CleanWorkingTree
154+
155+
$allFiles = Get-GitFilesUnderPwd
156+
157+
$targets = $allFiles | % {
158+
$fileName = [IO.Path]::GetFileName($_)
159+
$ext = [IO.Path]::GetExtension($fileName)
160+
161+
$isDot = $Dotfiles -contains $fileName
162+
$isNoExt = -not $fileName.Contains('.')
163+
164+
if ($isDot -or ($AllowedExts -contains $ext) -or
165+
($IncludeNoExt -and $isNoExt -and -not $isDot)) {
166+
$_
167+
}
168+
}
169+
| ? { Test-HasUtf8Bom $_ }
170+
| ? { -not (Test-ProbablyBinary $_) }
171+
172+
$changed = 0
173+
$byExtension = @{}
174+
$dotfileChanges = 0
175+
176+
$targets | % {
177+
$relative = Resolve-Path -LiteralPath $_ -Relative
178+
179+
if ($PSCmdlet.ShouldProcess($relative,'Strip UTF-8 BOM')) {
180+
if (Remove-Utf8BomInPlace -Path $_) {
181+
$changed++
182+
183+
$fileName = [IO.Path]::GetFileName($_)
184+
if ($Dotfiles -contains $fileName) { $dotfileChanges++ }
185+
186+
$ext = [IO.Path]::GetExtension($fileName)
187+
if (-not $byExtension.ContainsKey($ext)) { $byExtension[$ext] = 0 }
188+
$byExtension[$ext]++
189+
190+
"stripped BOM: $relative"
191+
}
192+
}
193+
}
194+
195+
"Done. Stripped BOM from $changed file(s)."
196+
197+
if ($byExtension.Keys.Count -gt 0) {
198+
""
199+
"By extension:"
200+
$byExtension.GetEnumerator() | Sort-Object Name | % {
201+
$key = if ([string]::IsNullOrEmpty($_.Name)) { '[noext]' } else { $_.Name }
202+
" {0}: {1}" -f $key, $_.Value
203+
}
204+
}
205+
206+
if ($dotfileChanges -gt 0) {
207+
" [dotfiles]: $dotfileChanges"
208+
}

0 commit comments

Comments
 (0)