1#!/usr/bin/env python3 2# Copyright(c) 2022 Meta 3# SPDX-License-Identifier: Apache-2.0 4 5"""Format HTTP Status codes for use in a C header 6 7This script extracts HTTP status codes from mozilla.org 8and formats them to fit inside of a C enum along with 9comments. 10 11The output may appear somewhat redundant but it strives 12to 13a) be human readable 14b) eliminate the need to look up status manually, 15c) be machine parseable for table generation 16 17The output is sorted for convenience. 18 19Usage: 20 ./scripts/net/enumerate_http_status.py 21 HTTP_100_CONTINUE = 100, /**< Continue */ 22 ... 23 HTTP_418_IM_A_TEAPOT = 418, /**< I'm a teapot */ 24 ... 25 HTTP_511_NETWORK_AUTHENTICATION_REQUIRED = 511, /**< Network Authentication Required */ 26""" 27 28from html.parser import HTMLParser 29import requests 30import re 31 32class HTTPStatusParser(HTMLParser): 33 def __init__(self): 34 super().__init__() 35 self.status_codes = {} 36 self.in_code_tag = False 37 self.current_data = "" 38 39 def handle_starttag(self, tag, attrs): 40 if tag == 'code': 41 self.in_code_tag = True 42 self.current_data = "" 43 44 def handle_endtag(self, tag): 45 if tag == 'code' and self.in_code_tag: 46 self.in_code_tag = False 47 if self.current_data.strip(): 48 match = re.match(r'([0-9]{3}) ([a-zA-Z].*)', self.current_data.strip()) 49 if match: 50 code = int(match.group(1)) 51 description = match.group(2) 52 self.status_codes[code] = description 53 54 def handle_data(self, data): 55 if self.in_code_tag: 56 self.current_data += data 57 58page = requests.get('https://developer.mozilla.org/en-US/docs/Web/HTTP/Status') 59 60parser = HTTPStatusParser() 61parser.feed(page.text) 62 63for key in sorted(parser.status_codes.keys()): 64 val = parser.status_codes[key] 65 enum_head = 'HTTP' 66 enum_body = f'{key}' 67 enum_tail = val.upper().replace(' ', '_').replace("'", '').replace('-', '_') 68 enum_label = '_'.join([enum_head, enum_body, enum_tail]) 69 comment = f'/**< {val} */' 70 71 print(f'{enum_label} = {key}, {comment}') 72