Sunday, February 14, 2010

Prepare files for upload to SharePoint

'______________________________________________________________________________________________
'++++++++++++++++++++ File Digester for SharePoint Document Library Loading ++++++++++++++++++++
'++++++++++++++++++++ File Digester for SharePoint Document Library Loading ++++++++++++++++++++
'++++++++++++++++++++ File Digester for SharePoint Document Library Loading ++++++++++++++++++++
'+
'+
'+ +++++ Usage Summary +++++
'+ Allows a source root folder to be specified (in CONFIG Section below) and have all files and sub-folders' files
'+ to be crawled. Every file found is checked for:
'+ 1) SharePoint-disallowed characters in the filename.
'+ 2) barred filetype extensions (i.e. the OoTB ones for SharePoint)
'+ 3) filename length longer than 128 characters (i.e. SP's max allowed).
'+
'+ You must specify the destination folder for the "cleaned-upu" files to be COPIED to. I.e. the original's are left untouched.
'+ You can also specify if you want the outputted copies to be broken into subfolders.
'+ If you do so, you need to specify what total size you want to aim for for each sub-folder.
'+
'+ See Deron for more details if need be.
'+
'+ '+++++ Revision History +++++
'+
'+ 2010-02-13 (21:45MDT) Deron S. Dilger Original Version
'+
'+
'+
'______________________________________________________________________________________________




Option Explicit

Dim intDebugLevel
Dim DisallowedCharArray
Dim SubstitutionChar
Dim DisallowedExtensionsArray
Dim SubstitutionExtension
Dim strSourceRootPath
Dim strDestRootPath
Dim CurrentPath
Dim strCurrentDestPath
Dim CopyResult
Dim intDocIDStart
Dim intFilesFoundCounter
Dim intFoldersFoundCounter
Dim intBadCharsCounted
Dim intBadExtCounted
Dim intTooLongNamesCounted
Dim MsgOut
Dim CurrentPrefix
Dim intMaxFileCount
Dim blnOutputDestSubs
Dim MaxOutputFolderSizeMB
Dim objShell
Dim objFSO
Dim objLogTS
Dim intUserResponse
Dim objSrcRootFolder
Dim objDestRootFolder

Dim FileDigestionLogPath
Dim aItem 'the object (file or folder) found by the recursive FSO crawl

'--- Set counters to zero
intFilesFoundCounter = 0
intFoldersFoundCounter = 0
intBadCharsCounted = 0
intBadExtCounted = 0
intTooLongNamesCounted = 0

'______________________________________________________________________________________________
'++++++++++ CONFIGURATION AREA ++++++++++++++++++++++++++++++
'++++++++++ CONFIGURATION AREA ++++++++++++++++++++++++++++++
'++++++++++ CONFIGURATION AREA ++++++++++++++++++++++++++++++

intDebugLevel = 0 '0=off, 1 = low verbosity, 2 = medium, 3 = high

'--- SharePoint doesn't allow the following characters in files uploaded to docLib.../ \ : * ? " < > | # { } % ~ &
'--- but since WinXP doesn't allow the following characters \/:*?"<>|
'--- we'll only check for # {}%~&
DisallowedCharArray = Array("#", Chr(9), "{", "}", "*", "~", "&")
SubstitutionChar = "^"

'---
DisallowedExtensionsArray = LoadDisallowedExtensionsArray() 'see the function to modify which extensions are corrected
SubstitutionExtension = ".XYZ"

'strSourceRootPath = "C:\PRRIP Library Digester"
strSourceRootPath = "C:\PRRIP Files"
strDestRootPath = "C:\PRRIP Library Digested For SP Uploader"

FileDigestionLogPath = strDestRootPath & "\^^^FileDigestion.log"

intDocIDStart = 0 'this will be incremented the first time into the loop so start one less than the desired first ID
intMaxFileCount = 5000
blnOutputDestSubs = True
MaxOutputFolderSizeMB = 180

'______________________________________________________________________________________________
'++++++++++ CODE AREA ++++++++++++++++++++++++++++++
'++++++++++ CODE AREA ++++++++++++++++++++++++++++++
'++++++++++ CODE AREA ++++++++++++++++++++++++++++++

intFilesFoundCounter = 0
intFoldersFoundCounter = 0

Set objShell = WScript.CreateObject ("WScript.Shell")
'### Set objFolder = objShell.Namespace (folderPath)
Set objFSO = WScript.CreateObject("Scripting.FileSystemObject")
Set objSrcRootFolder = objFSO.GetFolder(strSourceRootPath)

'--- See if our output/destination folder exists and if not, create it.
If objFSO.FolderExists(strDestRootPath) Then
intUserResponse = objShell.PopUp("Destination Folder" & vbCrLf & "'" & strDestRootPath & "'" & vbCrLf & " already exists!", ,"Proceed with Pre-Existing Destination Folder?", 52)
If intUserResponse = 7 Then
WScript.Quit
Else
Set objDestRootFolder = objFSO.GetFolder(strDestRootPath)
End If
Else
Set objDestRootFolder = objFSO.CreateFolder(strDestRootPath)
End If
'--- set our current output/destination folder as the root destination folder to start
strCurrentDestPath = strDestRootPath

'--- create the output log to record our file digesting moves into
If objFSO.FileExists(FileDigestionLogPath) Then
intUserResponse = objShell.PopUp("Digestion Log" & vbCrLf & "'" & FileDigestionLogPath & "'" & vbCrLf & " already exists!", ,"Append to Pre-Existing File Digestion Log?", 52)
If intUserResponse = 7 Then
WScript.Quit
End If
End If
Set objLogTS = objFSO.OpenTextFile(FileDigestionLogPath, 8, True)

objLogTS.WriteBlankLines(3)
objLogTS.WriteLine("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
objLogTS.WriteLine("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
objLogTS.WriteLine("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
objLogTS.WriteLine("File Digestion Started at " & Now)
objLogTS.WriteBlankLines(3)
objLogTS.WriteLine("!X! in a new filename indicates an intentionally truncated filename.")
objLogTS.WriteLine("^ in a new filename indicates an intentionally substituted character in the new filename.")
objLogTS.WriteLine("a double-extension filename indicates an intentionally added 'safe' extension for the filename.")
objLogTS.WriteLine("Field header row follows after three blank lines.....")
objLogTS.WriteBlankLines(3)
'--- Write the field header line to the log
objLogTS.WriteLine("DocID" & Chr(9) & "OutputPath" & Chr(9) & "OriginalPathAndName" & Chr(9) & "NewFileName")


'--- create the first sub-folder in the output folder if subs are desired
If blnOutputDestSubs Then
strCurrentDestPath = strCurrentDestPath & "\" & CStr(intDocIDStart+1)
'### WScript.echo("Check for existance of " & strCurrentDestPath & vbcrlf & " is:" & CStr(objFSO.FolderExists(strCurrentDestPath)))
If Not objFSO.FolderExists(strCurrentDestPath) Then
objFSO.CreateFolder(strCurrentDestPath)
End If
End If


'--- enter the recursive function to crawl the source folder's subfolders and rename/relocate all found files.
getInfo(objSrcRootFolder)

'--- build the final msg

MsgOut = "File Digestion Ended at " & Now & vbCrLf
MsgOut = MsgOut & "All done!" & vbCrLf & intFilesFoundCounter & " Files found in root folder plus " & intFoldersFoundCounter & " subfolders." & vbCrLf
MsgOut = MsgOut & "Bad Characters Removed = " & intBadCharsCounted & vbCrLf
MsgOut = MsgOut & "Bad Extensions Stuffed = " & intBadExtCounted & vbCrLf
MsgOut = MsgOut & "Overly-long Filenames Chopped = " & intTooLongNamesCounted & vbCrLf

WScript.Echo(MsgOut)

objLogTS.WriteBlankLines(3)
objLogTS.WriteLine(MsgOut)
objLogTS.Close
WScript.Quit

'+++ Some brainscribbles....
'### objFSO.MoveFile origFullName, newFullName

'______________________________________________________________________________________________

'++++++++++ FUNCTIONS AREA ++++++++++++++++++++++++++++++
'++++++++++ FUNCTIONS AREA ++++++++++++++++++++++++++++++
'++++++++++ FUNCTIONS AREA ++++++++++++++++++++++++++++++

Function LoadDisallowedExtensionsArray
'### .List of disallowed file extensions:
Dim TempArray
TempArray = Array(_
".ade",_
".adp",_
".app",_
".asa",_
".ash",_
".asm",_
".asp",_
".bas",_
".bat",_
".cdx",_
".cer",_
".chm",_
".class",_
".cmd",_
".cnt",_
".com",_
".config",_
".cpl",_
".crt",_
".csh",_
".der",_
".dll",_
".exe",_
".fxp",_
".gadget",_
".hlp",_
".hpj",_
".hta",_
".htr",_
".htw",_
".ida",_
".idc",_
".idq",_
".ins",_
".isp",_
".its",_
".jse",_
".ksh",_
".lnk",_
".mad",_
".maf",_
".mag",_
".mam",_
".maq",_
".mar",_
".mas",_
".mat",_
".mau",_
".mav",_
".maw",_
".mda",_
".mdb",_
".mde",_
".mdt",_
".mdw",_
".mdz",_
".msc",_
".msh",_
".msh1",_
".msh1xml",_
".msh2",_
".msh2xml",_
".mshxml",_
".msi",_
".msp",_
".mst",_
".ops",_
".pcd",_
".pif",_
".prf",_
".prg",_
".printer",_
".pst",_
".reg",_
".rem",_
".scf",_
".scr",_
".sct",_
".shb",_
".shs",_
".shtm",_
".shtml",_
".soap",_
".stm",_
".url",_
".vb",_
".vbe",_
".vbs",_
".ws",_
".wsc",_
".wsf",_
".wsh"_
)
LoadDisallowedExtensionsArray = TempArray
End Function
'______________________________________________________________________________________________


Function getInfo(pCurrentDir)
CurrentPath = objFSO.GetAbsolutePathName(pCurrentDir)
For Each aItem In pCurrentDir.Files
intFilesFoundCounter = intFilesFoundCounter + 1
CurrentPrefix = CStr(intFilesFoundCounter + intDocIDStart) & "-"
Dim OrigFilename
Dim NewFilename
OrigFileName = CStr(aItem.Name)
NewFileName = OrigFileName
NewFileName = CleanUpBadChars(NewFileName)
NewFileName = ChangeBadExtensions(NewFileName)
NewFileName = ChopNameLength(NewFileName, CurrentPrefix)

'--- debug info output?
If intDebugLevel > 1 Then
WScript.Echo ("File Found = " & intFilesFoundCounter & vbCrLf &_
"Current Dir: " & CurrentPath & vbCrLf & "Current File: " &_
aItem.Name & vbCrLf & "Relocated File's filename will be: " & NewFileName)
End If


'--- Move the files
CopyResult = CopySPSafeFilename((CurrentPath & "\" & OrigFileName), NewFileName)

'--- write the file modification info to the log
objLogTS.WriteLine(intFilesFoundCounter + intDocIDStart & Chr(9) & CurrentPath & Chr(9) & OrigFileName & Chr(9) & NewFileName)

If (intFilesFoundCounter > intMaxFileCount) Or (intFilesFoundCounter = intMaxFileCount) Then
Exit For
End If
Next

For Each aItem In pCurrentDir.SubFolders
intFoldersFoundCounter = intFoldersFoundCounter + 1

'--- if we haven't hit our stoppoint for files, keep digging.
If (intFilesFoundCounter > intMaxFileCount) Or (intFilesFoundCounter = intMaxFileCount) Then
Exit For
Else
If intDebugLevel > 1 Then
WScript.Echo ("Found sub-folder #: " & intFoldersFoundCounter & vbCrLF &_
"We are recursively passing thru sub-folder" & vbCrLf & aItem.Name)
End If
getInfo(aItem)
End If

Next
End Function
'______________________________________________________________________________________________


Function CleanUpBadChars(FileToCheck)
'--- SharePoint doesn't allow the following characters in files uploaded to docLib.../ \ : * ? " < > | # { } % ~ &
'--- but since WinXP doesn't allow the following characters \/:*?"<>|
'--- we'll only check for # {}%~&
'--- and the filename has to be shorter than 128 characters

'---count how many substitution chars we already have (by subtracting from the counter will use for bad ones below
Dim intBCCL
Dim NewBCCount
For intBCCL = 1 To Len(FileToCheck)
If Mid(FileToCheck, intBCCL, 1) = SubstitutionChar Then
NewBCCount = NewBCCount - 1
End If
Next

'--- now loop through the array of disallowed characters and replace any that are found in the filename
Dim NameCheckLoopCounter
For NameCheckLoopCounter = LBound(DisallowedCharArray) To UBound(DisallowedCharArray)
If intDebugLevel > 1 Then
WScript.Echo ("Checking " & vbCrLf & FileToCheck & vbCrLf & "for " & DisallowedCharArray(NameCheckLoopCounter))
End If
FileToCheck = Replace(FileToCheck, DisallowedCharArray(NameCheckLoopCounter), SubstitutionChar, 1)
Next

'---count how many bad chars we replaced
For intBCCL = 1 To Len(FileToCheck)
If Mid(FileToCheck, intBCCL, 1) = SubstitutionChar Then
NewBCCount = NewBCCount + 1
End If
Next
intBadCharsCounted = intBadCharsCounted + NewBCCount

CleanUpBadChars = FileToCheck
End Function
'______________________________________________________________________________________________


Function ChangeBadExtensions(FileToCheck)
'--- SharePoint blocks file uploads OoTB for files with certain extensions.
'--- and the filename has to be shorter than 128 characters
Dim NameCheckLoopCounter
For NameCheckLoopCounter = LBound(DisallowedExtensionsArray) To UBound(DisallowedExtensionsArray)
If intDebugLevel > 3 Then
WScript.Echo ("Checking " & vbCrLf & FileToCheck & vbCrLf & "for " & DisallowedExtensionsArray(NameCheckLoopCounter))
End If
'FileToCheck = Replace(FileToCheck, DisallowedExtensionsArray(NameCheckLoopCounter), SubstitutionExtension, 1)
If Right(FileToCheck, Len(DisallowedExtensionsArray(NameCheckLoopCounter))) = DisallowedExtensionsArray(NameCheckLoopCounter) Then
'--- Use the following equation to REPLACE the bad extension...
'FileToCheck = Left(FileToCheck, (Len(FileToCheck) - Len(DisallowedExtensionsArray(NameCheckLoopCounter)))) & SubstitutionExtension
'--- or use the following equation to tack the substitution extension on the end.
FileToCheck = FileToCheck & SubstitutionExtension
intBadExtCounted = intBadExtCounted + 1
End If
Next
ChangeBadExtensions = FileToCheck
End Function
'______________________________________________________________________________________________


Function ChopNameLength(FileToCheck, pCurrentPrefix)
If Len(FiletoCheck) > (128-Len(pCurrentPrefix)) Then
FileToCheck = "!X!" & Right(FileToCheck,((128-Len(pCurrentPrefix))-3) )
intTooLongNamesCounted = intTooLongNamesCounted + 1
End If
ChopNameLength = pCurrentPrefix & FileToCheck
End Function
'______________________________________________________________________________________________


Function CopySPSafeFilename(pOrigFPath, pNewFName)
Dim objCurrentOutputFolder
Dim strCalcDestFilePath

If blnOutputDestSubs Then
Set objCurrentOutputFolder = objFSO.GetFolder(strCurrentDestPath)
'### WScript.Echo("size of " & strCurrentDestPath & " in MB is: " & (objCurrentOutputFolder.Size / 1048576 ) )
If (objCurrentOutputFolder.Size / 1048576) > MaxOutputFolderSizeMB Then 'math of output folder size if for MegaBytes,
'###If (objCurrentOutputFolder.Size / 1024) > MaxOutputFolderSizeMB Then 'math of output folder size if for KILOBytes, not MB (for testing)
'create a new folder and change workingoutputpath
strCurrentDestPath = strDestRootPath & "\" & CStr(intFilesFoundCounter + intDocIDStart)
'### WScript.echo("Check for existance of " & strCurrentDestPath & vbcrlf & " is:" & CStr(objFSO.FolderExists(strCurrentDestPath)))
If Not objFSO.FolderExists(strCurrentDestPath) Then
objFSO.CreateFolder(strCurrentDestPath)
End If
End If
End If

strCalcDestFilePath = strCurrentDestPath & "\" & pNewFName

If intDebugLevel > 0 Then
WScript.Echo("Moving......" & vbCrLf & pOrigFPath & vbCrLf & "To" & vbCrLf & strCalcDestFilePath)
End If


objFSO.CopyFile pOrigFPath, strCalcDestFilePath, False
CopySPSafeFilename = 0
End Function

1 comment:

Anonymous said...

Hi Deron, Very cool post. This was exactly what I needed since we are planning to migrate a lot of data to sharepoint. My only question is how do you get the "prepared" files to sit in the same directory structure as the source?