From 9014b0d523b9e982e6fb25abb98395bcda28373e Mon Sep 17 00:00:00 2001 From: publicarray Date: Sat, 17 Jan 2026 13:40:06 +1100 Subject: [PATCH 1/4] First Attempt to add PowerShell Docs --- docs/file-scrapers.md | 53 +++++++++++++++++++++++ lib/docs/filters/powershell/clean_html.rb | 24 ++++++++++ lib/docs/filters/powershell/entries.rb | 26 +++++++++++ lib/docs/scrapers/powershell.rb | 32 ++++++++++++++ 4 files changed, 135 insertions(+) create mode 100644 lib/docs/filters/powershell/clean_html.rb create mode 100644 lib/docs/filters/powershell/entries.rb create mode 100644 lib/docs/scrapers/powershell.rb diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md index 076510fcd9..495fa6061c 100644 --- a/docs/file-scrapers.md +++ b/docs/file-scrapers.md @@ -321,3 +321,56 @@ mv three.js-r${VERSION}/docs/* docs/threejs~${VERSION}/ rm -rf three.js-r${VERSION}/ rm threejs.tar.gz ``` + +## PowerShell + +```sh +curl -o PowerShell-Docs-main.zip 'https://github.com/MicrosoftDocs/PowerShell-Docs/archive/refs/heads/main.zip' +unzip PowerShell-Docs-main.zip +cd PowerShell-Docs-main + +## Has missing documentation, claims it can't find files from toc.yml (get-help.md etc.) +# pacman -S dotnet-host aspnet-runtime +# yay -S dotnet-runtime-8.0-bin # for DocFxTocGenerator +# dotnet --list-runtimes +# dotnet tool update -g docfx +# dotnet tool install DocFxTocGenerator -g +# cd PowerShell-Docs-main/reference +# DocFxTocGenerator -d reference -sr --indexing NoDefault +# docfx reference/docfx.json -o ../docs/powershell + + +# strip all front matter in all Markdown files +find reference -name "*.md" -type f -exec sed -i '/^---$/,/^---$/d' {} + +## create a simpified template +# cat > reference/template.html << 'EOF' +# +# +# +# +# +# + +# +# +#
+#
+# +#
+#
+# +# +# EOF + +npx markdown-folder-to-html reference +cp -r _reference ../docs/powershell +cd .. + +# process whent on indefinatly +bundle exec thor docs:generate powershell --debug + +rm -rdf PowerShell-Docs-main/ +rm PowerShell-Docs-main.zip +``` diff --git a/lib/docs/filters/powershell/clean_html.rb b/lib/docs/filters/powershell/clean_html.rb new file mode 100644 index 0000000000..07a43051ce --- /dev/null +++ b/lib/docs/filters/powershell/clean_html.rb @@ -0,0 +1,24 @@ +module Docs + class Powershell + class CleanHtmlFilter < Filter + def call + # web + css('header').remove + css('#ms--content-header').remove + css('#article-header').remove + css('.left-container').remove + css('.layout-body-aside').remove + css('#site-user-feedback-footer').remove + css('footer').remove + # docfx + css('.sideaffix').remove + # markdown-folder-to-html + css('#menuLink').remove + css('#menu').remove + css('script').remove + css('style').remove + doc + end + end + end +end diff --git a/lib/docs/filters/powershell/entries.rb b/lib/docs/filters/powershell/entries.rb new file mode 100644 index 0000000000..d239178dc1 --- /dev/null +++ b/lib/docs/filters/powershell/entries.rb @@ -0,0 +1,26 @@ +module Docs + class Powershell + class EntriesFilter < Docs::EntriesFilter + def get_name + at_css('h1')&.content || "PowerShell" + end + + def get_type + case slug + when /^docs-conceptual/ + 'Scripting' + when /^5\.1/ + '5.1' + when /^7\.4/ + '7.4' + when /^7\.5/ + '7.5' + when /^7\.6/ + '7.6' + else + 'Module' + end + end + end + end +end diff --git a/lib/docs/scrapers/powershell.rb b/lib/docs/scrapers/powershell.rb new file mode 100644 index 0000000000..6a68b1b795 --- /dev/null +++ b/lib/docs/scrapers/powershell.rb @@ -0,0 +1,32 @@ +module Docs + class Powershell < FileScraper + # class Powershell < UrlScraper + self.name = 'PowerShell' + self.type = 'simple' + self.release = '7.5' + self.base_url = 'https://learn.microsoft.com/en-us/powershell' + # self.root_path = 'scripting/overview.html' + self.root_path = 'docs-conceptual/overview.html' + self.initial_paths = [ + # 'scripting/toc.html', + 'module/index.html', + # 'module/Microsoft.WSMan.Management/About/about_WS-Management_Cmdlets.html', + # 'module/PSWorkflow/About/about_ActivityCommonParameters.html', + # 'module/Microsoft.PowerShell.Core/About/About.html', + # 'module/PSReadLine/About/about_PSReadLine.html', + # 'module/Microsoft.PowerShell.Security/About/about_Certificate_Provider.html', + # 'module/PSScheduledJob/About/about_Scheduled_Jobs.html' + ] + self.links = { + home: 'https://learn.microsoft.com/powershell', + code: 'https://github.com/MicrosoftDocs/PowerShell-Docs' + } + html_filters.push 'powershell/clean_html', 'powershell/entries' + + # options[:rate_limit] = 100 # micososft docs online are ratelimited + options[:skip_patterns] = [/\/\//] # otherwise infinately adding the same pages + options[:attribution] = <<-HTML + The MIT License (MIT) Copyright (c) Microsoft Corporation + HTML + end +end From 5509031a2d8708efb112c4427fb4d1e2612c411a Mon Sep 17 00:00:00 2001 From: publicarray Date: Sat, 17 Jan 2026 13:49:09 +1100 Subject: [PATCH 2/4] Add missing icon files --- public/icons/docs/powershell/16.png | Bin 0 -> 1585 bytes public/icons/docs/powershell/16@2x.png | Bin 0 -> 3061 bytes public/icons/docs/powershell/SOURCE | 1 + 3 files changed, 1 insertion(+) create mode 100644 public/icons/docs/powershell/16.png create mode 100644 public/icons/docs/powershell/16@2x.png create mode 100644 public/icons/docs/powershell/SOURCE diff --git a/public/icons/docs/powershell/16.png b/public/icons/docs/powershell/16.png new file mode 100644 index 0000000000000000000000000000000000000000..730b58a30269746cfa5019ce6a4eb5a8344c08ae GIT binary patch literal 1585 zcmb7Ddo+}37=ORHn8_vc%?yTwkuKYYxs)_2xy3|mOxC5)6@!%`_7vrATw1cCTDMdZ zijj(HYW8Ssx;Rt2LTP1Va@mzjG_x}MPMovt{kil2l4mgQxL?au)!YnSDOo{ao%!K!lt+&;XcEK56g`APWEjjVcED43p=v z>YKqlc7b@&XL!Xt#z3BhU;q``Q&1_Gxtyxl&~)6V{Ffmfff~#UiHX>4!nR)`sX0TXHBySEQ7%Tu~^E{Kmz?A)6IrcLQ!+;vV zBG^y&s;2N?>PIkhb#h?Qn0YK#H2_7~sq6)$Et|QU;fUavREd=@fCIo|aX1_n4-Y&Z zuSU=$5};^nXe`h~waH{u8$~I)hI$mLfewn&=rn`Hi;azqDSD=6rbcFlM#e^}BnS*8 zs1X(ti3^RWDAnkHi}D^oiEw`;EMf{UD1t>1Wed=SrOuZ>*91_*6CuBZ1P~m8#lcz5 z3!EzskE)UBfIwwZqQuZi=ijjai-WWt3i*E|g-b`3-9QtIz?-osum$ZsLTNYu{iU(1 zH56adu&}Ycx6jeEgSF^SiJhrINumQT?fOF&)n&mRJ+76)%ZI6N!@LgCV)(DRPqLhE z#~VA6oC)Wf**By8Jjns6o{wIf$_f>*ym=WBDaS8ZN$t3CKfLC@q>PxAWKOFq&P|Q| z=_jduvEzFR-TQDr*pue%1*@9IDspbp#52^MO$WqFEq5xr05L6pJ!}_u$ezn1LODv@ zn*sOS%XPHlS?65H_sA&NPLmvyy&Qe=ksT|s)pRE%bzQ4#S>$^oB$I3Jlwme}B1>}5 z$((nPZkiYEpS{MxJIN=ccZ+_JhI!lJh;Po`W5gIRS+2rRNmFv*Az8w-jm$=Nw9<*5 zXr7tyF1&Z<)#mV{vw{u_-y%#OoiSV zjZlv&6QQr*#2!!CyC*?868&Upo9lwP2g?%rQb1Ah+1g7}C!a;_T_~FJJU$)X)s%ke z7kX|GFNJ;PXMQBl|IL;=L-pd?K(2unf#ft+(eXC>-9{VX4c*BpYVrfGJ$6IkWqm75 z8WS71y>4Wm>f{|3u@l7~+XM5DmnwR$`e;TLZY4h}C>WPK4vdSMM$|g43`~w~i#FU9 z?Z&6nNG%%Eo(gpeQ%Lo=qN4RT0;=N*8YIj81O<1NisVyd%aQKZj;+Z@8B1z`H;rT6r{!!bcFWD16{VNl zJWy$p*{xH-F%#@g5IuNkb6sj3d&<7*yX9*~dw-kW93?On?1`i-Z9bSP-+5%p9qTe0 zPuGmJt?9Gsk%tewk*<}$4axBe-WYr&2G_T&K&(D&#Eg^HW#T916WX-%k_x677{qudF&+j?s`+3fJmh+tR$1l;!!cdpa2LsVYcyKEc%nT*Q9>3 z8OYNAV&$I<1?>a~6xapM8K8wOLWSNvj-=Roze^$Mc9VKi{+1e`XG01)s5 zEnU32uDTY6prH$Xzo-Lz0B{olz`@;k0zn6c_Y9vfDZ$<;5hhidZ>C898~m z?H~})6~T|#fkf_*+lAgG_kRno6+j~a41j?_umBVdfuSM17r-9y7%;HTe*}Sq!15nwLSRrn2tVTID%e^$9~waLiz=uaitWM@TrcJARkV&yZR-X1OF+PG{u2izaQk)+ z8T4N{z-#%hfR6x%78O%4nT>pXN0ES?y`r{=_`y@oLQkd^&y_62)n+QoH&M!>q2+ zE-1^?CmHQqZ-`8%3qC%}h%My-=ify6atEa-hvEl;j4}UAj{CIWJH;Du@Vi$>?Pdd8 zhY!h!R|PW!!9j3X1)fY@JlwPnmkoT_(Q#7IO05#xh)gJ# zDUJ1yf=JFWcCdmR3s-AIfBCJqidb>pohV9PAnU(-N20qN9g3+cXfNae^9vhJ3eOg| zKHHq6Sei;sdoAG83KO0{_9h=Tesg(`Qr+wK(7dE!ETVBqKvC2BFuk|;xQwubFeab9(L%`fPe+_t#@BkySEfSP4dyStM)*lzYY&5N8Z|G0AT^pTjaw#g#9vq<^!5gt5U+T|NsF@Pza7)fkp-qF~ z=qSbU+(W?z+|J*+9LPE7Xpy!@DMkQMr}CrU`n_1!?${eGkZkJXavI+G-sw8`XfvGk zM*hOWg}%Njd!ByVrkYd0%lwPYb)gk#D>fQp$H#S;wqject=XIiAHAU!R^Hq3oc2&@ zZd3cqfRQSi9Ce`YAsP@$2%j8NQk(fYQm1{G;01Gs+Nz{?F0vrzg}6B3W45$b=TC#r z*s82k{UZm8R(3ZoO+1L%6XyA+q59`E?(%X-lzzho)gn1FOgAgz9QLNUWY->={hLB! z=wx*&mC|gL7O1PzDTx|CpUlJ{$D3bt_y59dO#seG-PZPfe&3Vy!XEV^dfKkW)yuCa z-v%em8E-O;DrvrSq$I_;bAB-V`bnbYSwFv9u?j9jlU?+nDb|qpgqM;`EdiHxJiq## z!TL}zI|?`#Ox?XBCAqrfX%S;@R85BP9eF-X7XH(fiGl2pO#%LECT8%MdX*L`YR&5n z?fC#vxIM|+_d-yKLB#MkYDERs z1a8++GM?>WLW#fg;o@h{6<=eLmv@&Rd;ROF${A(mNgZ6mFG$2Npv-1f%dMMVd{{Dzi~oCd;ONBFXC3fkcSVD z@%_6%>xAXpjPmZ6gct@phh!v{D9oD3hS|;bp~{|}myHaWQ^{UZ)w`e5E&ZWyxcB%2 zJxB#+Ac@I%$jW%&aek!rfoFH*p3gJXdMvc5$-497(BRWz2URyxV13)bTgi4*it~@KV@x7&e=$=bEJ2I5W4f3{mq&T6lIe(^{X)c+4qtR%|>r?VyK19Q2GHPxV7M+uZTC zG(_^%>E_T0wbjwlU>l`BoR~+a`M|1$2-!`{`?~E_gv>8RdeIrNbt9LTKROxL!Q|ISu0MGV8G`?A4U38 zOG~ahi`-|GB)Bd#=HC@VBjA`F7_?IE@1APSW%&ek3oIV19A^}5_fmDDHt3JBy{2+I zj?Rpk8bsbaeX%L7_dU)&(s!SRXZ5%*a+ZS=814!CmHti-wptVq5KcT+b*SFF#{_~X zS|b|iaa)yCqJ`dyKe2qmR^Mm9b2EEi4&%FVCaun?ha5Wp*!4%h@u1IWcE{~3O-oC? zDb;X;2G0F*mE1&<4McyEV>}janVty?45n%FNk&a?t*5vr#q}JuDrY4MAG<9rZT8?d zz&FfG`eF8xM`a4jlIAX|j*7n)AL70eDS1feo4XbxL#D3c=8wG|@!!(PLObyRZ}Wag zTrPA$%$%asNS*-ZuFzI4b+i>Fp=-1XD)N3c(9fFsN_DnW-Xk|NO6pDN?f+<2=d?dv z=wn@gl2YJZqori6Mqz6%bF-M>BIWMI-a&#-XG5Fm3o#N(Vj|dP^P_fR(=Ps_sJKn! z>(5-&yZH=T}J}gvnc!RMk4DsX@ulTjC?yVNDwaaoz zqE=dWaFnOQqE-6kZ*bcFlR##5BW-=?BDOt(m~Ii>!8z$7oQXgUBj2|ad^^L zf Date: Sat, 17 Jan 2026 13:50:13 +1100 Subject: [PATCH 3/4] cleanup --- docs/file-scrapers.md | 37 ++--------------------- lib/docs/filters/powershell/clean_html.rb | 2 -- lib/docs/scrapers/powershell.rb | 10 ------ 3 files changed, 2 insertions(+), 47 deletions(-) diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md index 495fa6061c..206f76630e 100644 --- a/docs/file-scrapers.md +++ b/docs/file-scrapers.md @@ -329,47 +329,14 @@ curl -o PowerShell-Docs-main.zip 'https://github.com/MicrosoftDocs/PowerShell-Do unzip PowerShell-Docs-main.zip cd PowerShell-Docs-main -## Has missing documentation, claims it can't find files from toc.yml (get-help.md etc.) -# pacman -S dotnet-host aspnet-runtime -# yay -S dotnet-runtime-8.0-bin # for DocFxTocGenerator -# dotnet --list-runtimes -# dotnet tool update -g docfx -# dotnet tool install DocFxTocGenerator -g -# cd PowerShell-Docs-main/reference -# DocFxTocGenerator -d reference -sr --indexing NoDefault -# docfx reference/docfx.json -o ../docs/powershell - - # strip all front matter in all Markdown files find reference -name "*.md" -type f -exec sed -i '/^---$/,/^---$/d' {} + -## create a simpified template -# cat > reference/template.html << 'EOF' -# -# -# -# -# -# - -# -# -#
-#
-# -#
-#
-# -# -# EOF + npx markdown-folder-to-html reference cp -r _reference ../docs/powershell cd .. - -# process whent on indefinatly -bundle exec thor docs:generate powershell --debug +bundle exec thor docs:generate powershell rm -rdf PowerShell-Docs-main/ rm PowerShell-Docs-main.zip diff --git a/lib/docs/filters/powershell/clean_html.rb b/lib/docs/filters/powershell/clean_html.rb index 07a43051ce..8b51696eef 100644 --- a/lib/docs/filters/powershell/clean_html.rb +++ b/lib/docs/filters/powershell/clean_html.rb @@ -10,8 +10,6 @@ def call css('.layout-body-aside').remove css('#site-user-feedback-footer').remove css('footer').remove - # docfx - css('.sideaffix').remove # markdown-folder-to-html css('#menuLink').remove css('#menu').remove diff --git a/lib/docs/scrapers/powershell.rb b/lib/docs/scrapers/powershell.rb index 6a68b1b795..8dc69fa33e 100644 --- a/lib/docs/scrapers/powershell.rb +++ b/lib/docs/scrapers/powershell.rb @@ -1,21 +1,12 @@ module Docs class Powershell < FileScraper - # class Powershell < UrlScraper self.name = 'PowerShell' self.type = 'simple' self.release = '7.5' self.base_url = 'https://learn.microsoft.com/en-us/powershell' - # self.root_path = 'scripting/overview.html' self.root_path = 'docs-conceptual/overview.html' self.initial_paths = [ - # 'scripting/toc.html', 'module/index.html', - # 'module/Microsoft.WSMan.Management/About/about_WS-Management_Cmdlets.html', - # 'module/PSWorkflow/About/about_ActivityCommonParameters.html', - # 'module/Microsoft.PowerShell.Core/About/About.html', - # 'module/PSReadLine/About/about_PSReadLine.html', - # 'module/Microsoft.PowerShell.Security/About/about_Certificate_Provider.html', - # 'module/PSScheduledJob/About/about_Scheduled_Jobs.html' ] self.links = { home: 'https://learn.microsoft.com/powershell', @@ -23,7 +14,6 @@ class Powershell < FileScraper } html_filters.push 'powershell/clean_html', 'powershell/entries' - # options[:rate_limit] = 100 # micososft docs online are ratelimited options[:skip_patterns] = [/\/\//] # otherwise infinately adding the same pages options[:attribution] = <<-HTML The MIT License (MIT) Copyright (c) Microsoft Corporation From 0682adda11764788d6dde3cb3c1192e255ff1a4a Mon Sep 17 00:00:00 2001 From: publicarray Date: Sat, 17 Jan 2026 16:18:46 +1100 Subject: [PATCH 4/4] fix heading --- lib/docs/filters/powershell/entries.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/docs/filters/powershell/entries.rb b/lib/docs/filters/powershell/entries.rb index d239178dc1..ed5d280d5c 100644 --- a/lib/docs/filters/powershell/entries.rb +++ b/lib/docs/filters/powershell/entries.rb @@ -2,7 +2,7 @@ module Docs class Powershell class EntriesFilter < Docs::EntriesFilter def get_name - at_css('h1')&.content || "PowerShell" + at_css('h1')&.content.chop.chop || "" # remove the extra ' #' end def get_type @@ -18,7 +18,7 @@ def get_type when /^7\.6/ '7.6' else - 'Module' + 'Manual' end end end