1 // Copyright 2015 The Chromium Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef BSSL_DER_PARSER_H_
16 #define BSSL_DER_PARSER_H_
17 
18 #include <stdint.h>
19 
20 #include <optional>
21 
22 #include <openssl/base.h>
23 #include <openssl/bytestring.h>
24 
25 #include "input.h"
26 
27 BSSL_NAMESPACE_BEGIN
28 namespace der {
29 
30 class BitString;
31 struct GeneralizedTime;
32 
33 // Parses a DER-encoded ASN.1 structure. DER (distinguished encoding rules)
34 // encodes each data value with a tag, length, and value (TLV). The tag
35 // indicates the type of the ASN.1 value. Depending on the type of the value,
36 // it could contain arbitrary bytes, so the length of the value is encoded
37 // after the tag and before the value to indicate how many bytes of value
38 // follow. DER also defines how the values are encoded for particular types.
39 //
40 // This Parser places a few restrictions on the DER encoding it can parse. The
41 // largest restriction is that it only supports tags which have a tag number
42 // no greater than 30 - these are the tags that fit in a single octet. The
43 // second restriction is that the maximum length for a value that can be parsed
44 // is 4GB. Both of these restrictions should be fine for any reasonable input.
45 //
46 // The Parser class is mainly focused on parsing the TLV structure of DER
47 // encoding, and does not directly handle parsing primitive values (other
48 // functions in the bssl::der namespace are provided for this.) When a Parser
49 // is created, it is passed in a reference to the encoded data. Because the
50 // encoded data is not owned by the Parser, the data cannot change during the
51 // lifespan of the Parser. The Parser functions by keeping a pointer to the
52 // current TLV which starts at the beginning of the input and advancing through
53 // the input as each TLV is read. As such, a Parser instance is thread-unsafe.
54 //
55 // Most methods for using the Parser write the current tag and/or value to
56 // the output parameters provided and then advance the input to the next TLV.
57 // None of the methods explicitly expose the length because it is part of the
58 // value. All methods return a boolean indicating whether there was a parsing
59 // error with the current TLV.
60 //
61 // Some methods are provided in the Parser class as convenience to both read
62 // the current TLV from the input and also parse the DER encoded value,
63 // converting it to a corresponding C++ type. These methods simply combine
64 // ReadTag() with the appropriate ParseType() free function.
65 //
66 // The design of DER encoding allows for nested data structures with
67 // constructed values, where the value is a series of TLVs. The Parser class
68 // is not designed to traverse through a nested encoding from a single object,
69 // but it does facilitate parsing nested data structures through the
70 // convenience methods ReadSequence() and the more general ReadConstructed(),
71 // which provide the user with another Parser object to traverse the next
72 // level of TLVs.
73 //
74 // For a brief example of how to use the Parser, suppose we have the following
75 // ASN.1 type definition:
76 //
77 //   Foo ::= SEQUENCE {
78 //     bar OCTET STRING OPTIONAL,
79 //     quux OCTET STRING }
80 //
81 // If we have a DER-encoded Foo in an Input |encoded_value|, the
82 // following code shows an example of how to parse the quux field from the
83 // encoded data.
84 //
85 //   bool ReadQuux(Input encoded_value, Input* quux_out) {
86 //     Parser parser(encoded_value);
87 //     Parser foo_parser;
88 //     if (!parser.ReadSequence(&foo_parser))
89 //       return false;
90 //     if (!foo_parser->SkipOptionalTag(kOctetString))
91 //       return false;
92 //     if (!foo_parser->ReadTag(kOctetString, quux_out))
93 //       return false;
94 //     return true;
95 //   }
96 class OPENSSL_EXPORT Parser {
97  public:
98   // Default constructor; equivalent to calling Parser(Input()). This only
99   // exists so that a Parser can be stack allocated and passed in to
100   // ReadConstructed() and similar methods.
101   Parser();
102 
103   // Creates a parser to parse over the data represented by input. This class
104   // assumes that the underlying data will not change over the lifetime of
105   // the Parser object.
106   explicit Parser(Input input);
107 
108   Parser(const Parser &) = default;
109   Parser &operator=(const Parser &) = default;
110 
111   // Returns whether there is any more data left in the input to parse. This
112   // does not guarantee that the data is parseable.
113   bool HasMore();
114 
115   // Reads the current TLV from the input and advances. If the tag or length
116   // encoding for the current value is invalid, this method returns false and
117   // does not advance the input. Otherwise, it returns true, putting the
118   // read tag in |tag| and the value in |out|.
119   [[nodiscard]] bool ReadTagAndValue(CBS_ASN1_TAG *tag, Input *out);
120 
121   // Reads the current TLV from the input and advances. Unlike ReadTagAndValue
122   // where only the value is put in |out|, this puts the raw bytes from the
123   // tag, length, and value in |out|.
124   [[nodiscard]] bool ReadRawTLV(Input *out);
125 
126   // Basic methods for reading or skipping the current TLV, with an
127   // expectation of what the current tag should be. It should be possible
128   // to parse any structure with these 4 methods; convenience methods are also
129   // provided to make some cases easier.
130 
131   // If the current tag in the input is |tag|, it puts the corresponding value
132   // in |out| and advances the input to the next TLV. If the current tag is
133   // something else, then |out| is set to nullopt and the input is not
134   // advanced. Like ReadTagAndValue, it returns false if the encoding is
135   // invalid and does not advance the input.
136   [[nodiscard]] bool ReadOptionalTag(CBS_ASN1_TAG tag, std::optional<Input> *out);
137 
138   // If the current tag in the input is |tag|, it puts the corresponding value
139   // in |out|, sets |was_present| to true, and advances the input to the next
140   // TLV. If the current tag is something else, then |was_present| is set to
141   // false and the input is not advanced. Like ReadTagAndValue, it returns
142   // false if the encoding is invalid and does not advance the input.
143   // DEPRECATED: use the std::optional version above in new code.
144   // TODO(mattm): convert the existing callers and remove this override.
145   [[nodiscard]] bool ReadOptionalTag(CBS_ASN1_TAG tag, Input *out, bool *was_present);
146 
147   // Like ReadOptionalTag, but the value is discarded.
148   [[nodiscard]] bool SkipOptionalTag(CBS_ASN1_TAG tag, bool *was_present);
149 
150   // If the current tag matches |tag|, it puts the current value in |out|,
151   // advances the input, and returns true. Otherwise, it returns false.
152   [[nodiscard]] bool ReadTag(CBS_ASN1_TAG tag, Input *out);
153 
154   // Advances the input and returns true if the current tag matches |tag|;
155   // otherwise it returns false.
156   [[nodiscard]] bool SkipTag(CBS_ASN1_TAG tag);
157 
158   // Convenience methods to combine parsing the TLV with parsing the DER
159   // encoding for a specific type.
160 
161   // Reads the current TLV from the input, checks that the tag matches |tag|
162   // and is a constructed tag, and creates a new Parser from the value.
163   [[nodiscard]] bool ReadConstructed(CBS_ASN1_TAG tag, Parser *out);
164 
165   // A more specific form of ReadConstructed that expects the current tag
166   // to be 0x30 (SEQUENCE).
167   [[nodiscard]] bool ReadSequence(Parser *out);
168 
169   // Expects the current tag to be kInteger, and calls ParseUint8 on the
170   // current value. Note that DER-encoded integers are arbitrary precision,
171   // so this method will fail for valid input that represents an integer
172   // outside the range of an uint8_t.
173   //
174   // Note that on failure the Parser is left in an undefined state (the
175   // input may or may not have been advanced).
176   [[nodiscard]] bool ReadUint8(uint8_t *out);
177 
178   // Expects the current tag to be kInteger, and calls ParseUint64 on the
179   // current value. Note that DER-encoded integers are arbitrary precision,
180   // so this method will fail for valid input that represents an integer
181   // outside the range of an uint64_t.
182   //
183   // Note that on failure the Parser is left in an undefined state (the
184   // input may or may not have been advanced).
185   [[nodiscard]] bool ReadUint64(uint64_t *out);
186 
187   // Reads a BIT STRING. On success returns BitString. On failure, returns
188   // std::nullopt.
189   //
190   // Note that on failure the Parser is left in an undefined state (the
191   // input may or may not have been advanced).
192   [[nodiscard]] std::optional<BitString> ReadBitString();
193 
194   // Reads a GeneralizeTime. On success fills |out| and returns true.
195   //
196   // Note that on failure the Parser is left in an undefined state (the
197   // input may or may not have been advanced).
198   [[nodiscard]] bool ReadGeneralizedTime(GeneralizedTime *out);
199 
200   // Lower level methods. The previous methods couple reading data from the
201   // input with advancing the Parser's internal pointer to the next TLV; these
202   // lower level methods decouple those two steps into methods that read from
203   // the current TLV and a method that advances the internal pointer to the
204   // next TLV.
205 
206   // Reads the current TLV from the input, putting the tag in |tag| and the raw
207   // value in |out|, but does not advance the input. Returns true if the tag
208   // and length are successfully read and the output exists.
209   [[nodiscard]] bool PeekTagAndValue(CBS_ASN1_TAG *tag, Input *out);
210 
211   // Advances the input to the next TLV. This method only needs to be called
212   // after PeekTagAndValue; all other methods will advance the input if they
213   // read something.
214   bool Advance();
215 
216  private:
217   CBS cbs_;
218   size_t advance_len_ = 0;
219 };
220 
221 }  // namespace der
222 BSSL_NAMESPACE_END
223 
224 #endif  // BSSL_DER_PARSER_H_
225