Thoughts on the current state of ASN.1 and XML technologies.

Re-using Decoded Items in a Subsequent Encoding

This blog post attempts to provide advice about re-using items from a decoded message in a subsequent encoding of a different message.

Let’s look at the employee sample in the c/sample_ber/employee directory of the ASN1C SDK. The ASN.1 specification for this sample is fairly simple and looks like this:


Employee DEFINITIONS ::= BEGIN
EXPORTS;

PersonnelRecord ::= [APPLICATION 0] IMPLICIT SET {
   Name,
   title [0] IA5String,
   number EmployeeNumber,
   dateOfHire [1] Date,
   nameOfSpouse [2] Name,
   children [3] IMPLICIT SEQUENCE OF ChildInformation
}

ChildInformation ::= SET {
   Name,
   dateOfBirth [0] Date
}

Name ::= [APPLICATION 1] IMPLICIT SEQUENCE {
   givenName IA5String,
   initial IA5String,
   familyName IA5String
}

EmployeeNumber ::= [APPLICATION 2] IMPLICIT INTEGER

Date ::= IA5String

END

Now, suppose the ChildInformation piece and the Name piece need to be used in a different message, called a FamilyRecord, that is going to be encoded after a PersonnelRecord message is decoded. We can change the ASN.1 specification that defines the PersonnelRecord so it looks like this:


Employee DEFINITIONS AUTOMATIC TAGS ::= BEGIN

EXPORTS;

IMPORTS Name, ChildInformation FROM Common;

PersonnelRecord ::= SET {
   employeeName Name,
   title IA5String,
   number EmployeeNumber,
   dateOfHire Date,
   nameOfSpouse Name,
   children SEQUENCE OF ChildInformation
}

EmployeeNumber ::= INTEGER

Date ::= IA5String

END

So we can see now that instead of defining ChildInformation and Name, the specification imports them from a module named Common. The other changes are that we are using an explicit element name called employeeName just to make things neater, and we are using AUTOMATIC TAGS for sanity preservation.

The Common module would look like this:


Common DEFINITIONS AUTOMATIC TAGS ::= BEGIN

EXPORTS ChildInformation, Name;

ChildInformation ::= SET {
   childName Name,
   dateOfBirth Date
}

Name ::= SEQUENCE {
   givenName IA5String,
   initial IA5String,
   familyName IA5String
}

END

And we also need to create a specification that defines FamilyRecord:


Family DEFINITIONS AUTOMATIC TAGS ::= BEGIN

EXPORTS;

IMPORTS Name, ChildInformation FROM Common;

FamilyRecord ::= SET {
nameOfSpouse Name,
ageOfSpouse INTEGER (18..MAX),
children SEQUENCE OF ChildInformation
}

END

So we have a module named Common that defines ChildInformation and Name. And we have two other modules, named Employee (which defines the PersonnelRecord PDU) and Family (which defines the FamilyRecord PDU) that both make use of these two definitions in the Common module.

Now, suppose we have a need to write some C code that decodes a PersonnelRecord and uses the name of the employee’s spouse and and the information about the employee’s children in a new encoding of a FamilyRecord. Below is a complete C program that can accomplish this. The sections that I’m going to talk about in a little more detail are indicated with numbers in square brackets, e.g., [1], [2], [3], etc.


/*
This program does the following:
Reads and decodes an already-encoded PersonnelRecord message.
Uses pieces of that decoded PersonnelRecord to populate the structures for a new FamilyRecord message.
Encodes that FamilyRecord message.
*/

#include "Employee.h"
#include "Family.h"
#include "rtxsrc/rtxDiag.h"
#include "rtxsrc/rtxFile.h"

#define MAXMSGLEN (1024)

int main()
{
   PersonnelRecord tEmployee;
   FamilyRecord tFamily;
   OSCTXT tDecodeContext, tEncodeContext;

   /* Receives the encoded (i.e., not yet decoded) PersonnelRecord message from the message.dat file */
   OSOCTET* pachEmployeeMessage;

   /* Receives the encoded FamilyRecord message from this program's encode call */
   OSOCTET achFamilyMessage[MAXMSGLEN];

   /* Receives a pointer to the encoded FamilyRecord message in order to print it and then write it to a file */
   OSOCTET *pachFamilyMessage;

   OSSIZE iLength;
   int iStatus;
   FILE* ptOutputFile;
   const char szInputFileName[] = "EmployeeMessage.dat";
   const char szOutputFileName[] = "FamilyMessage.dat";
   OSBOOL bTrace = TRUE, bVerbose = FALSE;

   /* Initialize the context structure for the decoding. */
   if (rtInitContext (&tDecodeContext) != 0) { /* [1] */
      printf ("Error initializing decode context\n");
      return -1;
   }
   rtxSetDiag (&tDecodeContext, bVerbose);

   /* Read the input file into a memory buffer. */
   iStatus = rtxFileReadBinary (&tDecodeContext, szInputFileName, &pachEmployeeMessage, &iLength); /* [2] */
   if (0 != iStatus) {
      printf ("Error opening %s for read access\n", szInputFileName);
      return -1;
   }
   iStatus = xd_setp64 (&tDecodeContext, pachEmployeeMessage, iLength, 0, 0, 0);
   if (0 != iStatus) {
      rtxErrPrint (&tDecodeContext);
      return iStatus;
   }

   /* Clear the structures that will receive the decoded message. */
   asn1Init_PersonnelRecord (&tEmployee);

   /* Decode the PersonnelRecord message. */
   iStatus = asn1D_PersonnelRecord (&tDecodeContext, &tEmployee, ASN1EXPL, 0); /* [3] */
   if (0 == iStatus) {
      if (bTrace) {
         printf ("Decode of PersonnelRecord was successful\n");
         printf ("Decoded record:\n");
         asn1Print_PersonnelRecord ("Employee", &tEmployee);
      }
   }
   else {
      printf ("decode of PersonnelRecord failed\n");
      rtxErrPrint (&tDecodeContext);
      return -1;
   }

   /* Now use the spouse's name and the children's names in a new FamilyRecord message. */

   /* Initialize the context structure for the encoding. */
   iStatus = rtInitContext (&tEncodeContext); /* [4] */
   if (0 != iStatus) {
      printf ("encoding context initialization failed\n");
      rtxErrPrint (&tEncodeContext);
      return iStatus;
   }
   rtxSetDiag (&tEncodeContext, bVerbose);

   /* Populate the structures for the FamilyRecord message. */ /* [5] */
   tFamily.nameOfSpouse = tEmployee.nameOfSpouse;
   tFamily.ageOfSpouse = 30;
   tFamily.children = tEmployee.children;

   /* Encode the FamilyRecord message. */
   xe_setp (&tEncodeContext, achFamilyMessage, sizeof(achFamilyMessage));
   if ((iLength = asn1E_FamilyRecord (&tEncodeContext, &tFamily, ASN1EXPL)) > 0) /* [6] */
   {
      pachFamilyMessage = xe_getp (&tEncodeContext);
      if (bTrace) {
         if (XU_DUMP (pachFamilyMessage) != 0)
         printf ("dump of ASN.1 message failed.");
      }
   }
   else {
      rtxErrPrint (&tEncodeContext);
      return iLength;
   }

   /* Write the encoded message out to the output file */ /* [7] */

   if (0 != (ptOutputFile = fopen (szOutputFileName, "wb"))) {
      fwrite (pachFamilyMessage, 1, iLength, ptOutputFile);
      fclose (ptOutputFile);
   }
   else {
      printf ("Error opening %s for write access\n", szOutputFileName);
      return -1;
   }

   /* Now free up our contexts. */ /* [8] */
   rtFreeContext (&tDecodeContext);
   rtFreeContext (&tEncodeContext);

   return 0;
}

In part [1] we’re initializing a context structure for decoding.

In part [2] we’re reading a file that contains an encoded PersonnelRecord. The byte array pachEmployeeMessage will have the bytes of the encoded message.

In part [3] we’re decoding the PersonnelRecord into the tEmployee structure.

In part [4] we’re initializing a context structure for encoding. Note that we’re using different context structures for decoding and encoding.

Part [5] is the crucial part. Here we’re populating the members of the tFamily structure before we use it to encode a FamilyRecord message. For two of those members we’re using members of the tEmployee structure, which contains the decoded information from the PersonnelRecord message. In both cases the members are structures in the generated C code, so the assignment results in a shallow copy of the structure from tEmployee to tFamily. So all pointers within the tEmployee structures stay the same in the tFamily structures. The crucial part to remember here is that the memory used for the decoding of the PersonnelRecord message (i.e., the tEmployee structure) must remain intact until we’re completely done with the tFamily structure, since the tFamily structure now has pointers to that memory.

In part [6] we’re encoding a FamilyRecord message using the tFamily structure that we just populated in part [5].

In part [7] we’re writing the encoded FamilyRecord message out to a file.

In part [8] we’re freeing the two contexts that we used, one for decoding and one for encoding. As pointed out in part [5] it’s crucial that the context, and hence the memory, used for the decoding remain intact until we’re completely done with the encoding, since the structure used for the encoding has pointers to the memory used for the decoding.

No Comments

Compact code generation in ASN1C

ASN.1 is used in a lot of different areas and a new area that is within the Internet of Things (IoT).  In particular Narrowband IoT (NB-IoT) uses ASN.1 UPER-based messaging.

One characteristic of these devices is they are small, so code size is critical.  We have been working on ways to make our ASN1C generated code and run-time libraries as compact as possible for applications such as these.   In our latest ASN1C v7.2.1 patch release, we are now including a new set of compact libraries for Linux.  These can be found in the c/lib_compact directories.  They are built with gcc using maximum space optimization settings and with a lot of non-critical code stripped out.  The compact libraries are roughly 25% smaller than the standard libraries.

In addition to using the compact libraries, additional steps can be taken to reduce the size of the generated code.  We touched on some of these in a past blog post entitled “Optimizing PER Encoding and Code Footprint“.  We would also recommend using the following command-line options (the equivalent GUI option is in parentheses):

  • -compact  (Generate compact code)
  • -noinit  (uncheck the Generate Initialization Functions checkbox)
  • -noenumconvert (do not generate enum-to-string conversion functions – should only be enabled if print functions are generated)

Other options that you may or may not be able use:

  • -lax (Do not generate constraint checks)
  • -strict-size (Interpret size constraints strictly)

If all of these measures are employed, users could potentially see the size of their application reduced by one half or more.

 

No Comments

ASN1C 7.2 Improved Comment Handling

In version 7.2, we improved our handling of ASN.1 comments, as follows.

  • When using the “Pretty-print ASN.1” (-asn1)  option, comments from type assignments and elements (SEQUENCE/SET/CHOICE components) are now included in the output.  Previously, pretty-printed ASN.1 did not include any ASN.1 comments in the output.
  • When generating C/C++ code, we previously put ASN.1 comments only for types into the C/C++ comments.  We now include ASN.1 comments from elements as well.
  • When writing comments, we now try to preserve the position of the comment as it appeared in the ASN.1.  We formerly printed all comments before the type assignment with which we associated the comment, even if the comment actually appeared after the type assignment.

When we output ASN.1 comments and ASN.1 syntax, whether the context is pretty-printed ASN.1 or C/C++ comments, we are not simply writing out everything as it appeared in the input.  This means we have to associate comments with syntax.  Since ASN.1 comments don’t have a syntactic relationship to other parts of the ASN.1 syntax, such associations involve a heuristic.  In the example below, the comment is potentially associated with either BigNumber or SmallNumber, though common practice suggests it’s most likely related to SmallNumber.

BigNumber ::= INTEGER (500..1000)

-- This is the type to use for speeds

SmallNumber ::= INTEGER (0..30)

Here’s a rough description of the heuristic rules we use:

  • If the start of a comment comes after some other ASN.1 syntax appearing on the same line, the comment is considered related to that syntax.
  • If the start of a comment is preceded, on the same line, only by whitespace, the comment may be related either to syntax that precedes or succeeds the comment.
    • If the comment is followed by a type assignment or an element, the first such comment that is not indented, relative to the type assignment or element, is associated with that type assignment or element.  Successive comments are also associated with the same item, regardless of indentation.
    • Any comments that preceded the first non-indented comment (all of which are indented) are associated with something which precedes those comments.  If these comments are immediately preceded by a type assignment or an element, they are associated with that type assignment or element.  In any case, they will not be associated with an element or type assignment that follows those comments.

Some examples:

-- comment for Person
Person ::= SEQUENCE {
   -- comment for age
   age INTEGER, -- another comment for age
      -- yet another comment for age
   -- comment for name
   name UTF8String
} -- another comment for Person
   -- yet another comment for Person

-- comment for Winnings
Winnings::= INTEGER (500..1000)

It is possible that these heuristics will associate a comment differently than a human reader would have.  Consider this example:

BigNumber ::= INTEGER (500..1000)

   -- SmallNumber is used for speeds

SmallNumber ::= INTEGER (0..30)

Because of the indentation, the comment will be associated with BigNumber, but it obviously actually relates to SmallNumber.  Since we try to preserve location when printing, we’ll print the comment after the definition of BigNumber, which can give the reader a hint that the comment might actually relate to something else (if the content of the comment were different, this might not be so obvious to the reader).

No Comments

Performance Improvements in ASN1C v7.2

One of the new features announced in the release of ASN1C v7.2 was improved C/C++ PER encode and decode performance.  This blog post provides some details on the improvements.

We measured a number of different messages types, but two that stood out were the improvement in encoding and decoding unaligned PER messages for LTE RRC and DSRC – two commonly used specifications.   For LTE, we encoded a sample of DL-DCCH-Message messages.  For DSRC, a set of BasicSafetyMessage messages were used.

The time in milliseconds to decode and encode 100000 records between v71x and v72x is shown in the following charts:

The chart on the left shows decoding and on the right encoding.  As can be seen, decoding performance for LTE RRC improved by over 100% while for DSRC, the gain was approximately 50%.  Encoding performances gains were not as dramatic with improvements of 28% and 18% respectively.

So to what to attribute these gains?  The main improvement was in doing calculations that were being done in the run-time to be done at compile time and thus generating simpler code for things such as constraint checking.  Similar improvements were made in the generation of code specifically targeted as “unaligned PER” (or UPER as it is commonly known) through the use of the -uper command-line option.  This resulted in the removal of code that checked for aligned or unaligned PER at run-time.

Finally, we introduced a new kind of simplified memory management called static memory blocks that contains some limitations on use but which is much faster because rather than having to carve up blocks, it simply always sequentially allocates going forward.  Further details on this are available at the following URL:

https://www.obj-sys.com/docs/acv72/CCppHTML/ch08s04.html

 

No Comments

Adding Information to the J2735 Specification

As some background, see the following article:

http://dsrc-tools.com/map-spat/index.php/knowledge-base/regional-data-extensions-generic-lane-object/

This article discusses how to add some information to the J2735 specification.

This specific blog post discusses how to add information for a new region to the J2735 specification, generate code for it using ASN1C, and then reference the new material in your own code. In this example we will add a GenericLane definition for a new region named Mars. That’s Mars as in the planet, not the town in western PA.

To start, the J2735 ASN.1 specification defines a type called a RegionId as follows:

RegionId ::= INTEGER (0..255)
   noRegion     RegionId ::= 0  -- Use default supplied stubs
   addGrpA      RegionId ::= 1  -- USA
   addGrpB      RegionId ::= 2  -- Japan
   addGrpC      RegionId ::= 3  -- EU
   -- NOTE: new registered regional IDs will be added here
   -- The values 128 and above are for local region use

This definition is in the DSRC module in the ASN.1 file. If you read the background material (you did, didn’t you?), you know that the DSRC module is not to be modified by users of the J2735 specification.

But the background material also indicates that there is a module named REGION in the ASN.1 file that can be modified by developers. So let’s make the first line of this module look like this:

marsRegion RegionId ::= 222

The background material says to let SAE know what number you’ve chosen so it can be tracked. Since there is no intention that I know of to define intelligent traffic systems on the planet Mars, this number 222 isn’t actually being reported to SAE and remains available for anyone else to use.

Now a few lines down let’s modify the Reg-GenericLane definition so it looks like this:

Reg-GenericLane           DSRC.REG-EXT-ID-AND-TYPE ::= {
   { Mars.GenericLane-mars IDENTIFIED BY marsRegion },
   ...  }

Then at the end of the ASN.1 file let’s add a module named Mars that looks like this:

-- ^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-
-- 
-- Begin module: Mars
-- 
-- ^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-
 
Mars DEFINITIONS AUTOMATIC TAGS::= BEGIN

GenericLane-mars ::= SEQUENCE {
   laneType LaneType,
   distanceFromViking1 INTEGER, -- Since this is a distance, it could be constrained to be positive
   hue RedHue,
   ...
}

LaneType ::= ENUMERATED {
   martian-lane (1),
   rover-lane (2),
   ...
}

RedHue ::= ENUMERATED {
   light-red (1),
   deep-red (2),
   ...
}

END

That’s it as far as the ASN.1 changes are concerned. Now let’s generate some C++ code using ASN1C with a command like this, assuming our changed ASN.1 definitions are in a file named J2735Mars.asn:

asn1c j2735Mars.asn -uper -cpp -table-unions -genwriter -genreader -genprint -pdu GenericLane -gentest

Let’s look at what those qualifiers do:

  • -uper: Specifies that we’re using unaligned PER (Packed Encoding Rules).
  • -table-unions: Specifies that the code generation should take into account inter-dependencies within the ASN.1 definitions. These inter-dependencies are known as table constraints.
  • -genwriter: Specifies that we want to generate a writer program (writer.cpp) that will encode an instance of our message.
  • -genreader: Specifies that we want to generate a reader program (reader.cpp) that will decode an instance of our message.
  • -genprint: Specifies that we want to generate methods to print the contents of the message.
  • -pdu GenericLane: Specifies that our writer and reader programs are to work with an instance of GenericLane (as opposed to any of the other definitions in the ASN.1 file).
  • -gentest: Specifies that the writer is to generate random test data to populate our GenericLane instance.

You can also add -genmake to generate a makefile that will build the writer and reader binaries.

If you generate the code and then build it, you will have a writer binary that will encode the message into a file named message.dat, and you will have a reader binary that will decode the message that’s in the message.dat file. For my specific situation my message looked like this:

GenericLane {
   laneID = 252
   name = 'AMXrH7pmSZHULHTJ:Us_gUAqMrI'
   ingressApproach = 1
   egressApproach = 3
   laneAttributes {
      directionalUse = { 2, 01xxxxxx }
      sharedWith = { 10, 0x72 01xxxxxx }
      laneType {
         sidewalk = { 16, 0x48 0x61 }
      }
      regional {
         regionId = 254
         regExtValue = 0x04083677ccd172888708
      }
   }
   maneuvers = { 12, 0x5E 0100xxxx }
   nodeList {
      computed {
         referenceLaneId = 181
         offsetXaxis {
            large = -9096
         }
         offsetYaxis {
            small = 253
         }
         rotateXY = 18514
         scaleXaxis = 1124
         scaleYaxis = -1034
         regional[0] {
            regionId = 119
            regExtValue = 0x040831403e5e9dfbcb99
         }
      }
   }
   connectsTo[0] {
      connectingLane {
         lane = 189
         maneuver = { 12, 0x53 0100xxxx }
      }
      remoteIntersection {
         region = 16975
         id = 25828
      }
      signalGroup = 245
      userClass = 0
      connectionID = 70
   }
   overlays[0] = 9
   regional[0] {
      regionId = 222
      regExtValue {
         GenericLane-mars {
            laneType = martian-lane
            distanceFromViking1 = 3814
            hue = light-red
         }
      }
   }
}

Keep in mind that this data is randomly generated test data, and it will be different with each code generation. Note the “regional” section at the end, which is where we added our information about lanes on Mars. The test data generator chose our Mars definition, probably because it’s the only one there. But you can see values for the fields we added for traffic lanes on Mars.

The writer.cpp file calls the genTestInstance() method of the GenericLanePDU object, which is an instance of the class ASN1C_GenericLane. The code for this genTestInstance() method is in the generated file DSRCTest.cpp. The end of the method is where the new items are given values.

No Comments