From 64acf0921483329410b43d0af81c7c8a6abbe68f Mon Sep 17 00:00:00 2001 From: Alexander Date: Mon, 16 Jun 2025 19:27:42 -0400 Subject: [PATCH] correctly handle unknown syntaxes when highlighting --- acl.cool/site/OVERVIEW_GP1.html | 84 ++++++++++++++++----------------- acl.cool/syntax_wrapper.sh | 1 + build.sh | 19 +------- soupault.toml | 2 +- syntax_wrapper.sh | 9 ++++ ytheleus.org/syntax_wrapper.sh | 1 + 6 files changed, 56 insertions(+), 60 deletions(-) create mode 120000 acl.cool/syntax_wrapper.sh create mode 100755 syntax_wrapper.sh create mode 120000 ytheleus.org/syntax_wrapper.sh diff --git a/acl.cool/site/OVERVIEW_GP1.html b/acl.cool/site/OVERVIEW_GP1.html index 5270701..f250ec3 100644 --- a/acl.cool/site/OVERVIEW_GP1.html +++ b/acl.cool/site/OVERVIEW_GP1.html @@ -17,7 +17,7 @@ section.

the character doesn't cause a parsing issue. For example, whitespace tokens are not allowed in variable names.

Some examples of assigning variables:

-
var x: i32;  // x is an uninitialized 32-bit signed integer
+
var x: i32;  // x is an uninitialized 32-bit signed integer
 var y <- x;  // this won't work, because x has no value
 x <- 7;
 var y <- x;  // this time it works, because x is now 7
@@ -25,12 +25,12 @@ var y <- x;  // this time it works, because x is now 7
 con a: f64 <- 99.8;  // a is immutable
 a <- 44.12;          // this doesn't work, because con variables cannot be reassigned

The following lines are equivalent,

-
con a <- f64(7.2);
+
con a <- f64(7.2);
 con a: f64 <- 7.2;
 con a <- 7.2;        // 7.2 is implicitly of type f64
 con a <- 7.2D;       // With an explicit type suffix

as are these.

-
var c: f32 <- 9;
+
var c: f32 <- 9;
 var c <- f32(9);
 var c: f32 <- f32(9);
 var c <- 9F;
@@ -68,7 +68,7 @@ Numeric operators are as one expects from C, with the addition of ** as a power operator.

Numeric literals have an implicit type, or the type can be specified by a case-insensitive suffix. For example:

-
var i1 <- 1234;    // implicitly i32
+
var i1 <- 1234;    // implicitly i32
 var f1 <- 1234.5;  // implicitly f64
 
 var i3 <- 1234L;   // i64
@@ -150,7 +150,7 @@ value can be used as a literal in this fasion.

double-quoted, e.g. "Hello, World.".

Arrays

GP1 supports typical array operations.

-
var tuples : (int, int)[]; // declare array of tuples
+
var tuples : (int, int)[]; // declare array of tuples
 var strings : string[];    // declare array of strings
 
 var array <- i32[n];       // declare and allocate array of n elements
@@ -161,7 +161,7 @@ con nums <- {1, 2, 3};     // immutable array of i32
 

Use the length property to access the number of elements in an allocated array. Attempting to access length of an unallocated array is an exception.

-

+

 var colors <- {"Red", "White", "Blue"};  // allocate array
 
 var count <- colors.length; // count is usize(3)
@@ -170,7 +170,7 @@ var count <- colors.length; // count is usize(3)
 Negative values wrap from the end (-1 is the last element). An exception
 occurs if the value is too big, i.e.no modulo operation is
 performed.

-
var w <- {1, 2, 3, 4, 5, 6, 7};
+
var w <- {1, 2, 3, 4, 5, 6, 7};
 
 w[0]  // first element, 1
 w[-1] // last element, 7
@@ -191,7 +191,7 @@ i.e.(u128(4), "2").1 would be "2".

identical to that of .NET 5 and very similar to that of gawk.

Named Functions

Some examples of defining named functions:

-
fn sum(a: f32, b: f32): f32 { a + b }        // takes parameters and returns an f32
+
fn sum(a: f32, b: f32): f32 { a + b }        // takes parameters and returns an f32
 
 fn twice_println(s: string) {                // takes parameters and implicitly returns ()
     println("${s}\n${s}");
@@ -210,13 +210,13 @@ ordered from left to right in the function definition  that is
 unassigned. With regard to the join_println function
 defined above, this means that all of the following are valid and behave
 identically.

-
join_println(a <- "Hello,", b <- "World.");
+
join_println(a <- "Hello,", b <- "World.");
 join_println(b <- "World.", a <- "Hello,");
 join_println(b <- "World.", "Hello,");
 join_println("Hello,", "World.");

Function names may be overloaded. For example, join_println could be additionally defined as

-
fn join_println(a: string, b: string, sep: string) {    
+
fn join_println(a: string, b: string, sep: string) {    
     println("${a}${sep}${b}");
 }

and then both join_println("Hello,", "World.", " ") and @@ -226,7 +226,7 @@ be familar with this pattern from functional languages like F#, wherein a wrapper function is often used to guard an inner recursive function (GP1 permits both single and mutual recursion in functions). For example:

-
fn factorial(n: u256): u256 {
+
fn factorial(n: u256): u256 {
     fn aux(n: u256, accumulator: u256): u256 {
         match n > 1 {
             true => aux(n - 1, accumulator * n),
@@ -242,7 +242,7 @@ syntax used in this example, refer to Control Flow.

Closures behave as one would expect in GP1, exactly like they do in most other programming languages that feature them. Closures look like this:

-
var x: u32 <- 8;
+
var x: u32 <- 8;
 
 var foo <- { y, z => x * y * z};     // foo is a closure; its type is fn<u32 | u32>
 assert(foo(3, 11) == (8 * 3 * 11));  // true
@@ -268,7 +268,7 @@ sign is enclosed by them.

Lambdas are nearly identical to closures, but they don't close over their environment, and they use the -> symbol in place of =>. A few examples of lambdas:

-
con x: u32 <- 4;  // this line is totally irrelevant
+
con x: u32 <- 4;  // this line is totally irrelevant
 
 con square <- { x -> x * x };                 // this in not valid, because the type of the function is not known
 con square <- { x: u32 -> x * x };            // this if fine, because the type is specified in the lambda
@@ -281,20 +281,20 @@ there is a separate syntax for function types. Given the function
 fn sum(a: f64, b: f64): f64 { a + b } the function type is
 expressed fn<f64 f64 | f64>, meaning a function that
 accepts two f64 values and returns an f64. Therefore,

-
fn sum(a: f64, b: f64): f64 { a + b } 
-
con sum: fn<f64 f64 | f64> <- { a, b -> a + b };
-
con sum <- { a: f64, b: f64 -> a + b };
+
fn sum(a: f64, b: f64): f64 { a + b } 
+
con sum: fn<f64 f64 | f64> <- { a, b -> a + b };
+
con sum <- { a: f64, b: f64 -> a + b };

are all equivalent ways of binding a function of type fn<f64 f64 | f64> to the constant sum. Here's an example of how to express a function type for a function argument.

-
fn apply_op(a: i32, b: i32, op: fn<i32 i32 | i32>): i32 {
+
fn apply_op(a: i32, b: i32, op: fn<i32 i32 | i32>): i32 {
     op(a, b)
 }

Function Type Inference

The above example provides an explicit type for the argument op. You could safely rewrite this as

-
fn apply_op(a: i32, b: i32, op: fn): i32 {
+
fn apply_op(a: i32, b: i32, op: fn): i32 {
     op(a, b)
 }

because the compiler can safely infer the function type of @@ -306,19 +306,19 @@ is not allowed.

syntax used in this section.

Numeric types are automatically coerced into other numeric types as long as that coercion is not lossy. For example,

-
var x: i32 <- 10;
+
var x: i32 <- 10;
 var y: i64 <- x;

is perfectly legal (the 32-bit value fits nicely in the 64-bit variable). However, automatic coercion doesn't work if it would be lossy, so

-
var x: i64 <- 10;
+
var x: i64 <- 10;
 var y: i32 <- x;

doesn't work. This holds for numeric literals as well. Unsurprisingly, var x: i32 <- 3.14 wouldn't compile. The floating point value can't be automatically coerced to an integer type. So what does work? Casting via the target type's pseudo-constructor works.

-
con x: f64 <- 1234.5;        // okay because the literal can represent any floating point type
+
con x: f64 <- 1234.5;        // okay because the literal can represent any floating point type
 con y: f64 <- f16(1234.5);   // also okay, because any f16 can be losslessly coerced to an f64
 con z: i32 <- i32(x);        // also okay; uses the i32 pseudo-constructor to 'cast' x to a 32-bit integer
 
@@ -346,7 +346,7 @@ type of the function is not an integer, GP1 assumes an exit code of
 usize(0) and returns that to the operating system.

The following program prints Hello, World. and exits with an error code.

-
entry main(): usize {
+
entry main(): usize {
     hello_world();
     1
 }
@@ -358,9 +358,9 @@ fn hello_world() {
 keyword that makes it the entry point. The entry function may also be
 implicit. If one is not defined explicitly, the entire file is treated
 as being inside an entry function. Therefore,

-
println("Hello, World.");
+
println("Hello, World.");

is a valid and complete program identical to

-
entry main(): usize {
+
entry main(): usize {
     println("Hello, World.");
 }

This behavior can lend GP1 a very flexible feeling akin to many @@ -368,7 +368,7 @@ scripting languages.

In a program where there is an entry-point specified, only expressions made within that function will be evaluated. This means that the following program does NOT print anything to the console.

-
entry main(): usize {
+
entry main(): usize {
     con x: usize <- 7;
 }
 
@@ -383,7 +383,7 @@ structure, in two variants: match and
 *expr* are expressions and pattern* are
 pattern matching options (refer to Pattern Matching for more
 info).

-
match expr {
+
match expr {
     pattern1 => arm_expr1,
     pattern2 => arm_expr2,
     _ => arm_expr3,
@@ -394,7 +394,7 @@ expression executes all arms that match the pattern. Both flavors return
 their last executed expression.

The when keyword may be used in a given match arm to further restrict the conditions of execution, e.g.

-
con fs <- 43;
+
con fs <- 43;
 
 con is_even <- match fs {
     n when n % 2 == 0 => " is "
@@ -412,10 +412,10 @@ print(fs + is_even + "even.")

along with continue and break to help control program flow. All of these are statements.

-
loop { . . . }  // an unconditional loop -- runs forever or until broken
-
for i in some_iterable { . . . }  // loop over anything that is iterable
-
while some_bool { . . . }  // classic conditional loop that executes until the predicate is false
-
do { . . .
+
loop { . . . }  // an unconditional loop -- runs forever or until broken
+
for i in some_iterable { . . . }  // loop over anything that is iterable
+
while some_bool { . . . }  // classic conditional loop that executes until the predicate is false
+
do { . . .
 } while some_bool  // traditional do/while loop that ensures body executes at least once

Pattern Matching

Pattern matching behaves essentially as it does in SML, with support @@ -423,7 +423,7 @@ for various sorts of destructuring. It works in normal assignment and in match arms. It will eventually work in function parameter assignment, but perhaps not at first.

For now, some examples.

-
a <- ("hello", "world");  // a is a tuple of strings
+
a <- ("hello", "world");  // a is a tuple of strings
 (b, c) <- a;
 
 assert(b == "hello" && c == "world")
@@ -442,24 +442,24 @@ fn u32_list_to_string(l: List<u32>): string {  // this is assuming that sq
 

Enums

Enums are pretty powerful in GP1. They can be the typical enumerated type you'd expect, like

-
enum Coin { penny, nickle, dime, quarter }  // 'vanilla' enum
+
enum Coin { penny, nickle, dime, quarter }  // 'vanilla' enum
 
 var a <- Coin.nickle
 assert a == Coin.nickle
 

Or an enum can have an implicit field named value

-
enum Coin: u16 { penny(1), nickle(5), dime(10), quarter(25) }
+
enum Coin: u16 { penny(1), nickle(5), dime(10), quarter(25) }
 
 var a <- Coin.nickle;
 assert(a == Coin.nickle);
 assert(a.value == 5);

Or an enum can be complex with a user-defined set of fields, like

-
enum CarModel(make: string, mass: f32, wheelbase: f32) {  // enum with multiple fields
+
enum CarModel(make: string, mass: f32, wheelbase: f32) {  // enum with multiple fields
    gt          ( "ford",  1581, 2.71018 ),
    c8_corvette ( "chevy", 1527, 2.72288 )
 }

A field can also have a function type. For example

-
enum CarModel(make: string, mass: f32, wheelbase: f32, gasUsage: fn<f32 | f32>) {
+
enum CarModel(make: string, mass: f32, wheelbase: f32, gasUsage: fn<f32 | f32>) {
    gt          ( "ford",  1581, 2.71018, { miles_traveled -> miles_traveled / 14 } ),
    c8_corvette ( "chevy", 1527, 2.72288, { miles_traveled -> miles_traveled / 19 } )
 }
@@ -467,7 +467,7 @@ assert(a.value == 5);
var my_car <- CarModel.c8_corvette; var gas_used <- my_car.gasUsage(200); // estimate how much gas I'd use on a 200 mile trip

Equivalence of enums is not influenced by case values, e.g.

-
enum OneOrAnother: u16 { one(0), another(0) }
+
enum OneOrAnother: u16 { one(0), another(0) }
 
 con a <- OneOrAnother.one;
 con b <- OneOrAnother.another;
@@ -482,7 +482,7 @@ only value types are allowed for enum fields.

keyword. Fields are defined in the record block and behavior is defined in the optional impl block.

For example,

-
record Something {
+
record Something {
     label: i32    // field label followed by some type
 } impl { . . . }  // associated functions. This is different than having functions in the fields section because impl functions are not assignable.

If the record implements some interface, SomeInterface, @@ -492,7 +492,7 @@ the impl would be replaced with functions of the Something record.

Unions

Unions are the classic discriminated sum type.

-
union BinaryTree {
+
union BinaryTree {
     Empty,
     Leaf: i32,
     Node: (BinaryTree BinaryTree),
@@ -502,7 +502,7 @@ functions of the Something record.

section.

Type aliasing is provided with the type keyword, e.g.

-
type TokenStream Sequence<Token>
+
type TokenStream Sequence<Token>
 type Ast Tree<AbstractNode>
 
 fn parse(ts: TokenStream): Ast { . . . }
@@ -518,7 +518,7 @@ Types #, &, and @. These are immutable reference, mutable reference, and dereference, respectively. Some examples of referencing/dereferencing values:

-
var a <- "core dumped";
+
var a <- "core dumped";
 var b <- &a;                                       // b is a mutable reference to a
                                                  
 assert(a == @b);                                  
@@ -539,7 +539,7 @@ assert(@@c == a);
 references.

The reference operators may be prepended to any type, T, to describe the type of a reference to a value of type T, e.g.

-
fn set_through(ref: &string) {  // this function takes a mutable reference to a string and returns `()`
+
fn set_through(ref: &string) {  // this function takes a mutable reference to a string and returns `()`
     @ref <- "goodbye";
 }
 
diff --git a/acl.cool/syntax_wrapper.sh b/acl.cool/syntax_wrapper.sh
new file mode 120000
index 0000000..2af984f
--- /dev/null
+++ b/acl.cool/syntax_wrapper.sh
@@ -0,0 +1 @@
+../syntax_wrapper.sh
\ No newline at end of file
diff --git a/build.sh b/build.sh
index 6f54f9c..1f6e04b 100755
--- a/build.sh
+++ b/build.sh
@@ -21,23 +21,8 @@ source ./pgvv/bin/activate
 find acl.cool/site/ ytheleus.org/site/ -type f \( -name '*.dj' -o -name '*.html' \) -exec cat {} + >all_chars.txt
 cat common_chars.txt >>all_chars.txt
 
-for font in fonts/LiterataTT/LiterataTT-Subhead{Regular,Italic,Bold,BoldItalic}.woff2; do
-    woff2_decompress "$font"
-    ttf_font="${font%.woff2}.ttf"
-
-    subset_ttf="${ttf_font%.ttf}-Subset.ttf"
-    hb-subset "$ttf_font" \
-        --output-file="$subset_ttf" \
-        --text-file=all_chars.txt \
-        --layout-features='*' \
-        --passthrough-tables
-
-    woff2_compress "$subset_ttf"
-
-    rm "$subset_ttf" "$ttf_font"
-done
-
-for font in fonts/JuliaMono/*{-Light,-Regular,-SemiBold}{,Italic}.woff2; do
+for font in fonts/LiterataTT/LiterataTT-Subhead{Regular,Italic,Bold,BoldItalic}.woff2 \
+    fonts/JuliaMono/*{-Light,-Regular,-SemiBold}{,Italic}.woff2; do
     woff2_decompress "$font"
     ttf_font="${font%.woff2}.ttf"
 
diff --git a/soupault.toml b/soupault.toml
index 3f9537d..e3097be 100644
--- a/soupault.toml
+++ b/soupault.toml
@@ -139,4 +139,4 @@ delete_all = true
 [widgets.syntax]
 widget = "preprocess_element"
 selector = 'pre code'
-command = "pygmentize -l ${ATTR_CLASS##*-} -f html | head -c -13 | awk -F '
' '{print $NF}'"
+command = "./syntax_wrapper.sh ${ATTR_CLASS##*-}"
diff --git a/syntax_wrapper.sh b/syntax_wrapper.sh
new file mode 100755
index 0000000..3f1c4f7
--- /dev/null
+++ b/syntax_wrapper.sh
@@ -0,0 +1,9 @@
+#! /usr/bin/env bash
+
+if [[ $# -lt 1 ]] || ! { pygmentize -L lexers | grep -qw "$1"; }; then
+    printf ""
+    cat
+    printf ""
+else
+    pygmentize -l $1 -f html | head -c -13 | awk -F '
' '{print $NF}'
+fi
\ No newline at end of file
diff --git a/ytheleus.org/syntax_wrapper.sh b/ytheleus.org/syntax_wrapper.sh
new file mode 120000
index 0000000..2af984f
--- /dev/null
+++ b/ytheleus.org/syntax_wrapper.sh
@@ -0,0 +1 @@
+../syntax_wrapper.sh
\ No newline at end of file