1#!/usr/bin/env python3
2# Copyright(c) 2022 Meta
3# SPDX-License-Identifier: Apache-2.0
4
5"""Format HTTP Status codes for use in a C header
6
7This script extracts HTTP status codes from mozilla.org
8and formats them to fit inside of a C enum along with
9comments.
10
11The output may appear somewhat redundant but it strives
12to
13a) be human readable
14b) eliminate the need to look up status manually,
15c) be machine parseable for table generation
16
17The output is sorted for convenience.
18
19Usage:
20    ./scripts/net/enumerate_http_status.py
21	HTTP_100_CONTINUE = 100, /**< Continue */
22    ...
23    HTTP_418_IM_A_TEAPOT = 418, /**< I'm a teapot */
24	...
25	HTTP_511_NETWORK_AUTHENTICATION_REQUIRED = 511, /**< Network Authentication Required */
26"""
27
28from html.parser import HTMLParser
29import requests
30import re
31
32class HTTPStatusParser(HTMLParser):
33    def __init__(self):
34        super().__init__()
35        self.status_codes = {}
36        self.in_code_tag = False
37        self.current_data = ""
38
39    def handle_starttag(self, tag, attrs):
40        if tag == 'code':
41            self.in_code_tag = True
42            self.current_data = ""
43
44    def handle_endtag(self, tag):
45        if tag == 'code' and self.in_code_tag:
46            self.in_code_tag = False
47            if self.current_data.strip():
48                match = re.match(r'([0-9]{3}) ([a-zA-Z].*)', self.current_data.strip())
49                if match:
50                    code = int(match.group(1))
51                    description = match.group(2)
52                    self.status_codes[code] = description
53
54    def handle_data(self, data):
55        if self.in_code_tag:
56            self.current_data += data
57
58page = requests.get('https://developer.mozilla.org/en-US/docs/Web/HTTP/Status')
59
60parser = HTTPStatusParser()
61parser.feed(page.text)
62
63for key in sorted(parser.status_codes.keys()):
64    val = parser.status_codes[key]
65    enum_head = 'HTTP'
66    enum_body = f'{key}'
67    enum_tail = val.upper().replace(' ', '_').replace("'", '').replace('-', '_')
68    enum_label = '_'.join([enum_head, enum_body, enum_tail])
69    comment = f'/**< {val} */'
70
71    print(f'{enum_label} = {key}, {comment}')
72