Custom Tool to build protobuff files in Visual Studio

Here is the source to a visual studio addin for compiling protobuf files directly to cs source.
This is a great starting point for using an external compiler as a code generator that fits directly into visual studio.
This allows the definition of a Custom Tool that creates a code behind file that is automatically regenerated every time the top level item is saved.
It does require references to some items in the VS.NET SDK.
The error handling is fairly simplistic: Errors are embedded in place of the generated code.
The resulting dll needs to be registered for COM.  The registration code is as minimal as possible – even having the attribute use a constant.
using System;
using System.Globalization;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.VisualStudio.TextTemplating.VSHost;
using Microsoft.Win32;
using System.Diagnostics;
namespace ProtogenGenerator
{
[Guid(CustomGuidString)]
[ComVisible(true)]
public class MinimalTool : BaseCodeGeneratorWithSite
{
//This allows this to be defined once – the com registration routines need to match the declared guid
#warning Replace this with your own guid…
public const string CustomGuidString = “99C3D237-70D3-498c-BD54-CD108CC5E82A”;
private static Guid CustomToolGuid = new Guid( “{” + CustomGuidString + “}”);
private const string CustomToolName = “ProtogenGenerator”; // Need to update this…
private const string CustomToolDescription = “Generates ProtoGen”; // Need to update this…
private const string SourceFileExtension = “.proto”;
StringBuilder _errorBuffer = new StringBuilder();
StringBuilder _outputBuffer = new StringBuilder();
protected override byte[] GenerateCode(string inputFileName, string inputFileContent)
{
string rootName = Path.GetFileNameWithoutExtension(inputFileName);
string inputPath = Path.GetDirectoryName(inputFileName);
string toolFolder = Path.GetDirectoryName(typeof(MinimalTool).Assembly.Location);
ProcessStartInfo psi = new ProcessStartInfo( toolFolder + @”protoc.exe”);
psi.CreateNoWindow = true;
psi.WorkingDirectory = toolFolder;
psi.WindowStyle = ProcessWindowStyle.Hidden;
psi.RedirectStandardError = true;
psi.UseShellExecute = false;
StringBuilder arguments = new StringBuilder();
arguments.AppendFormat(@” –proto_path=””{0};{1}”””, toolFolder, inputPath);
arguments.AppendFormat(@” –descriptor_set_out=””{0}{1}.pb”””, toolFolder, rootName);
arguments.AppendFormat(@” “”{0}googleprotobuf{1}”””, toolFolder, “descriptor.proto”);
arguments.AppendFormat(@” “”{0}{1}”””, toolFolder, “csharp_options.proto”);
arguments.AppendFormat(@” “”{0}”””, inputFileName);
psi.Arguments = arguments.ToString();
int result;
using (Process proc = Process.Start(psi))
{
proc.EnableRaisingEvents = true;
proc.ErrorDataReceived += ProcErrorDataReceived;
proc.BeginErrorReadLine();
proc.WaitForExit();
result = proc.ExitCode;
proc.ErrorDataReceived -= ProcErrorDataReceived;
}
if (result != 0)
{
StringBuilder results = new StringBuilder();
results.Append(“There was a problem with the protoc compiler.”);
results.AppendLine();
results.Append(psi.FileName);
results.AppendLine();
results.Append(arguments.ToString());
results.AppendLine();
results.Append(_errorBuffer.ToString());
return Encoding.ASCII.GetBytes(results.ToString());
}
// Reset the string builder
_errorBuffer = new StringBuilder();
// We now have our “prebuild selector”
psi = new ProcessStartInfo(toolFolder + @”protogen.exe”);
psi.CreateNoWindow = true;
psi.WorkingDirectory = toolFolder;
psi.WindowStyle = ProcessWindowStyle.Hidden;
psi.RedirectStandardError = true;
psi.RedirectStandardOutput = true;
psi.UseShellExecute = false;
arguments = new StringBuilder();
string pbFileName = string.Format(@”{0}{1}.pb”, toolFolder, rootName);

 

arguments.AppendFormat(” ” + pbFileName);
psi.Arguments = arguments.ToString();
using (Process proc = Process.Start(psi))
{
proc.EnableRaisingEvents = true;
proc.ErrorDataReceived += ProcErrorDataReceived;
proc.OutputDataReceived += ProcOutputDataReceived;
proc.BeginOutputReadLine();
proc.BeginErrorReadLine();
proc.WaitForExit();
proc.ErrorDataReceived -= ProcErrorDataReceived;
proc.OutputDataReceived -= ProcOutputDataReceived;
result = proc.ExitCode;
}
if (result != 0)
{
StringBuilder results = new StringBuilder();
results.AppendFormat(“There was a problem with the protogen compiler {0}”, result);
results.AppendLine();
results.Append(psi.FileName);
results.AppendLine();
results.Append(arguments.ToString());
results.AppendLine();
results.AppendLine(“Error:”);
results.Append(_errorBuffer.ToString());
results.AppendLine();
results.AppendLine(“Output:”);
results.Append(_outputBuffer.ToString());
return Encoding.ASCII.GetBytes(results.ToString());
}
// I now need to find the name of the generated file
string sourceContent = File.ReadAllText(inputFileName);
string outputNamePrefix = null;
// I need to know the name of the generated cs file.
// Now I know the rules:
// 1. It can be specified in option
//     (google.protobuf.csharp_file_options).umbrella_classname = “PositionBuffer”;
// 2. If not specified there then:
//    take the file name, remove the extension
//    Convert to PascalCase, removing punctuation. Numbers and punctuation trigger new words.

Regex re = new Regex(“umbrella_classname[ ]*=[ ]*”(.*)”[ ]*;”);

if (re.IsMatch(sourceContent))
{
// We have pulled the umbrella_classname definition from the source.
outputNamePrefix = re.Match(sourceContent).Groups[1].Value;
}
else
{
outputNamePrefix = UnderscoresToPascalOrCamelCase(rootName, true);
}

if (outputNamePrefix == null)
{
return Encoding.ASCII.GetBytes(“Unable to determine the umbrella_classname”);
}
string outputFileName = toolFolder + “\” + outputNamePrefix + “Description.cs”;
if (!File.Exists(outputFileName))
{
// Try again
outputFileName = toolFolder + “\” + outputNamePrefix + “.cs”;
if (!File.Exists(outputFileName))
{
return Encoding.ASCII.GetBytes(“Unable to find file ” + outputFileName);
}
}
Byte[] resultBuffer = File.ReadAllBytes(outputFileName);
// This is the tidy up – only do this when the compile works.
File.Delete(pbFileName);
File.Delete(outputFileName);
return resultBuffer;
}
// This was taken from the FileDescriptor
private static string UnderscoresToPascalOrCamelCase(string input, bool pascal)
{
StringBuilder result = new StringBuilder();
bool capitaliseNext = pascal;
for (int i = 0; i < input.Length; i++)
{
char c = input[i];
if (‘a’ <= c && c <= ‘z’)
{
if (capitaliseNext)
{
result.Append(char.ToUpper(c, CultureInfo.InvariantCulture));
}
else
{
result.Append(c);
}
capitaliseNext = false;
}
else if (‘A’ <= c && c <= ‘Z’)
{
if (i == 0 && !pascal)
{
// Force first letter to lower-case unless explicitly told to capitalize it.
result.Append(char.ToLower(c, CultureInfo.InvariantCulture));
}
else
{
// Capital letters after the first are left as-is.
result.Append(c);
}
capitaliseNext = false;
}
else if (‘0’ <= c && c <= ‘9’)
{
result.Append(c);
capitaliseNext = true;
}
else
{
capitaliseNext = true;
}
}
return result.ToString();
}
void ProcOutputDataReceived(object sender, DataReceivedEventArgs e)
{
_outputBuffer.AppendLine(e.Data);
}
void ProcErrorDataReceived(object sender, DataReceivedEventArgs e)
{
_errorBuffer.AppendLine(e.Data);
}
public override string GetDefaultExtension()
{
return “.cs”;
}
#region Registration
private static Guid CSharpCategory =
new Guid(“{FAE04EC1-301F-11D3-BF4B-00C04F79EFBC}”);
private const string KeyFormat
= @”SOFTWAREMicrosoftVisualStudio{0}Generators{1}{2}”;
protected static void Register(Version vsVersion, Guid categoryGuid)
{
string subKey = String.Format(KeyFormat,
vsVersion, categoryGuid.ToString(“B”), CustomToolName);
using (RegistryKey key = Registry.LocalMachine.CreateSubKey(subKey))
{
key.SetValue(“”, CustomToolDescription);
key.SetValue(“CLSID”, CustomToolGuid.ToString(“B”));
key.SetValue(“GeneratesDesignTimeSource”, 1);
}
subKey = String.Format(KeyFormat,
vsVersion, categoryGuid.ToString(“B”), SourceFileExtension);
//This automates the association of the custom key with this tool.
using (RegistryKey key = Registry.LocalMachine.CreateSubKey(subKey))
{
key.SetValue(“”, CustomToolName);
}
}
protected static void Unregister(Version vsVersion, Guid categoryGuid)
{
string subKey = String.Format(KeyFormat,
vsVersion, categoryGuid.ToString(“B”), CustomToolName);
Registry.LocalMachine.DeleteSubKey(subKey, false);
subKey = String.Format(KeyFormat,
vsVersion, categoryGuid.ToString(“B”), SourceFileExtension);
Registry.LocalMachine.DeleteSubKey(subKey, false);
}
[ComRegisterFunction]
public static void RegisterClass(Type t)
{
// Register for both VS.NET 2002, 2003, 2008 and 2010  (C#)
Register(new Version(8, 0), CSharpCategory);
Register(new Version(9, 0), CSharpCategory);
Register(new Version(10, 0), CSharpCategory);
}
[ComUnregisterFunction]
public static void UnregisterClass(Type t)
{ // Unregister for both VS.NET 2002, 2003, 2008 and 2010 (C#)
Unregister(new Version(8, 0), CSharpCategory);
Unregister(new Version(9, 0), CSharpCategory);
Unregister(new Version(10, 0), CSharpCategory);
}
#endregion
}
}

 

Scalable Codegen Process : Application Minature

I have been working on code generation lately and have finally made a breakthough that will make life easier for most projects. I have been following the techniques found in Kathleen Dollard’s (KD) “Code Generation in .NET” book.
The intent of the book was to demonstrate code generation techniques with a sample generator.  A lot of people have gone with her generator as if it were a product.  This misses the point – the generator is easy to write it is the base classes, templates and techniques that are important.  The book was about the process not the product!

I have gone for an even more bare-bones generator than KD.

My transformation process goes:

model.codegen  + template – XSLT ->  batch file

batch file runs a series of:

model.codegen + transform + parameters -> generated code/scripts &c

The big thing is that all generation comes from one document via xslt.

So far I am generating:

  • Table creation scripts
  • Data access layer
  • Stored Procedures
  • Model to dal mapping code

I am planning on generating:

  • Model
  • View
  • Controller
  • View to Model Mapping code

The beauty of this is it all uses XSLT and requires only one custom exe. I have also found that performing the transform as a single match makes it easier to read the transform.  That is don’t call other templates – all in one complex document.

The real trick is to develop an “application minature”.
This is a small application that has one or two screens that demonstrate each of the classes that will be used in the finished application.  This is handcrafted.  This is used to create and fix the templates and the metadata document.
you may need to adjust it to match the slightly weaker coding standards that must be applied to XSLT.

You create this small application that provides just enough functionality to provide the principles involved.  This may only have a handful of model classes.  New techniques can be experimented with easily and if they generate well they can be rolled out the full application.  This has the benefit of making the entire application to be consistent.  There will be none of we experemented with that but were stuck with it.  The best idea is to have no exceptions – everything is one of the standard options. Of course you can add handcrafted code – but this must be for the truely exceptional cases.

The big stumbling block is the application framework (that is how to solve the problem). If you have an application then you should be able to extract a framework from the best parts.  If not look for a framework that solves your problems or build or buy one.  You must understand the template, the generation process and your framework.  If something does not fit then change it.  This allows new ideas to be put into production rapidly.

These techniques work very well when combined with a very pure MVC or MVP framework.  If you avoid explicit duplication in your code this will make your life very easy.

Another book on codegen

This is a book about code generation.

From the two chapters that are publically available it seems that he knows what he is talking about.

My current take on codegen is to simply use xslt.

I have an extended xslt utility that I use.

The syntax is:

xslt source transform target [name value]*

This allows one root document to be used to create a batch file that generates the used code from the one core document.

It has a few other tricks (handling or header comments and logging changes), but that is about it.
It does include some string  manipulation extensions.