Began javascript implementation of asemica (perl) encryption

author: Marvin Borner 2018-05-27 15:51:53 +0200
committer: Marvin Borner 2018-05-27 15:51:53 +0200
commit: 2976f5ee4fed771d7eed8e4dfc790aa970840a9e (patch)
tree: c189626035c7a4721c53a96f8319557ea7a8bbb4
parent: 205cbe2fbb3e29eb5930b307250b90a3d09a433d (diff)
3 files changed, 711 insertions, 1 deletions
diff --git a/main/app/sprinkles/core/assets/SiteAssets/js/asemica.js b/main/app/sprinkles/core/assets/SiteAssets/js/asemica.js
new file mode 100644
index 0000000..3f4de45
--- /dev/null
+++ b/main/app/sprinkles/core/assets/SiteAssets/js/asemica.js
@@ -0,0 +1,89 @@
+function asemica(PlainText, CorpusUrl) {
+    let CorpusString, Tokens, Transitions;
+
+    fetch(CorpusUrl)
+        .then(function (response) {
+            response.text().then(function (response) {
+                CorpusString = response;
+                Tokens = tokenize_corpus(CorpusString);
+                Transitions = generate_transitions(Tokens);
+                console.log(Transitions);
+            });
+        });
+
+    /*
+    * Breaks the input corpus into a series of processable "tokens"
+    *
+    * Example output: ['The','Project','Gutenberg', ... ,'about','new','eBooks']
+    */
+    function tokenize_corpus(CorpusString) {
+        // Clean up things
+        const StrippedCorpus = CorpusString
+            .replace(/\n/g, " ") // newlines
+            .replace(/<\/?[^>]+(>|$)/g, "") // html
+            .replace(/[^\w']/g, " ") // non-word characters
+            .replace(/[0-9]/g, " ") // numbers
+            .replace(/\s\s+/g, " ") // sequences of spaces
+            .replace(/^\s+/, "") // leading whitespace
+            .replace(/\s+$/, ""); // trailing whitespace
+
+        Tokens = StrippedCorpus.split(/\s/);
+
+        return Tokens;
+    }
+
+    /*
+    * Creates the primary transition matrix
+    *
+    * Example output:
+    *
+   	* 'atlantic' => {                       // Lowercase form
+    *           'seen' => 2,                // How many times seen in corpus
+    *           'exits' => {                // Which words follow it?
+    *                         'City' => 1,  // One instance of this
+    *                         'and' => 1    // One instance of that
+    *                      },               // Exits not guaranteed unique
+    *           'door' => [                 // Doors are guaranteed unique
+    *                        'City',        // Following door number 1
+    *                        'and'          // Following door number 2
+    *                     ],
+    *           'doors' => 2,               // Cached count of doors
+    *           'token' => 'Atlantic'       // Original form of the token
+    * }
+    * ...
+    */
+    function generate_transitions(Tokens) {
+        Transitions = {};
+        
+        Tokens.forEach(function(Token, Index) {
+            var ObjectKey = Token.toLowerCase();
+            if (Transitions[ObjectKey] === undefined) { // Will run one time -> initialize
+                Transitions[ObjectKey] = {};
+                Transitions[ObjectKey]["seen"] = 1;
+                Transitions[ObjectKey]["exits"] = {};
+                Transitions[ObjectKey]["door"] = [];
+                if (Tokens[Index + 1] !== undefined) {
+                    Transitions[ObjectKey]["exits"][Tokens[Index + 1]] += 1;
+                }
+                Transitions[ObjectKey]["token"] = ObjectKey;
+            } else { // Will run n times
+                var CurrentSeenValue = Transitions[ObjectKey]["seen"];
+                Transitions[ObjectKey]["seen"] = CurrentSeenValue + 1;
+            }
+        });
+
+        Transitions.forEach(function(Transition) {
+           var Exits =  Transitions[Transition]["exits"].sort();
+           var Found = {};
+
+           Exits.forEach(function(Exit) {
+
+           });
+
+
+        });
+
+        return Transitions;
+    }
+
+}
+\ No newline at end of file
diff --git a/main/app/sprinkles/core/assets/SiteAssets/js/asemica.pl b/main/app/sprinkles/core/assets/SiteAssets/js/asemica.pl
new file mode 100644
index 0000000..c321935
--- /dev/null
+++ b/main/app/sprinkles/core/assets/SiteAssets/js/asemica.pl
@@ -0,0 +1,617 @@
+#!/usr/bin/perl -w
+use strict;
+use warnings;
+
+
+###
+# Asemica -- An asemic Markov-chained cipher
+# Copyright (c) 2011 by Danne Stayskal <danne@stayskal.com>
+###
+
+###
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+###
+
+our $VERSION = '1.0';
+
+use Getopt::Long;
+
+my $corpus_file = '';
+my $input_file = '';
+my $output_file = '';
+my $verbose = 0;
+my $format = '';
+my $force = 0;
+my $get_help = !scalar(@ARGV);
+GetOptions(
+    "c|corpus=s" => \$corpus_file,
+    "i|input=s"  => \$input_file,
+    "o|output=s" => \$output_file,
+    "v|verbose+" => \$verbose,
+    "f|format=s" => \$format,
+    "force+"     => \$force,
+    "h|help+"    => \$get_help,
+);
+my $operation = shift @ARGV;
+
+if ($verbose) {
+    print STDERR "Asemica version $VERSION running\n";
+    print STDERR "   Corpus file: $corpus_file\n" if $corpus_file;
+    print STDERR "   Input file: $input_file\n" if $input_file;
+    print STDERR "   Output file: $output_file\n" if $output_file;
+    print STDERR "   Operation: $operation\n" if $operation;
+    print STDERR "   Force: $force\n" if $force;
+    print STDERR "   Help: $get_help\n" if $get_help;
+    print STDERR "\n";
+}
+
+my $usage = "Usage: $0 (enc|dec) -c <corpus_file> [-i <input_file>] " .
+    "[-o <output_file>] [-f <format>] [--force] [--help]\n";
+
+###
+# If all they want is help (or they ran this with no args), give them help
+###
+if ($get_help) {
+    print STDERR <<__END_HELP__
+Asemica: an asemic Markov-chained cipher, v. $VERSION
+$usage
+OPTIONS:
+   -c/--corpus:  specify corpus filename or URL
+   -i/--input:   specify input filename (defaults to STDIN)
+   -o/--output:  specify output filename (defaults to STDOUT)
+   -f/--format:  specify output format (defaults to none)
+   --force:      forces runtime on an insufficiently complex corpus
+   --help:       displays this message
+   -v/--verbost: increments verbosity setting (used for debugging)
+AVAILABLE FORMATS:
+   none:         doesn't format output; returns only word list
+   email:        formats output to look like an informal email
+   poem:         if you want your output to look like poetry
+EXAMPLES
+   echo "message" | $0 enc -c corpus.txt -o asemic.txt
+   $0 dec -c corpus.txt -i asemic.txt
+__END_HELP__
+    ;
+    exit 1;
+}
+
+
+###
+# Make sure we have necessary and sufficient inputs (command line or STDIN)
+###
+
+unless ($operation) {
+    print "No operation (encode or decode) specified.\n";
+    print STDERR $usage;
+    exit 1;
+}
+
+unless ($operation eq 'enc' || $operation eq 'dec') {
+    print "Invalid operation specified: $operation\n";
+    print STDERR $usage;
+    exit 1;
+}
+
+my $input = '';
+unless ($input_file) {
+    if ($verbose) {
+        print STDERR "No input file specified.  Reading from STDIN...\n";
+    }
+    $input = join('', <STDIN>);
+}
+else {
+    open('INPUT', '<', $input_file) || die "Can't read $input_file";
+    $input = join('', <INPUT>);
+    close('INPUT');
+}
+
+
+###
+# Load and verify corpus
+###
+
+my $corpus = '';
+unless ($corpus_file) {
+    print STDERR "No corpus specified.  Can't operate without a corpus.\n";
+    print STDERR $usage;
+    exit 1;
+}
+if (-f $corpus_file) {
+    ### It's a flat file.  Load and move on.
+    open('CORPUS', '<', $corpus_file) || die "Can't read $corpus_file";
+    $corpus = join('', <CORPUS>);
+    close('CORPUS');
+
+}
+elsif ($corpus_file =~ m/^https?/) {
+    ### It's a URI.  Try to download it.
+    $corpus = `curl -s $corpus_file`;
+
+    unless ($corpus) {
+        print STDERR "Unable to load corpus from $corpus_file\nExiting.\n";
+        exit 1;
+    }
+
+}
+else {
+    print STDERR "Couldn't read corpus at $corpus_file\nExiting.\n";
+    exit 1;
+}
+
+
+###
+# Calculate and verify corpus tokens and transition matrix
+###
+my $tokens = tokenize_corpus($corpus);
+my $transitions = generate_transitions(@$tokens);
+unless (verify_exits($transitions)) {
+    if ($force) {
+        if ($verbose) {
+            print STDERR "Insufficient number of nodes with sufficient exits " .
+                "to perform quality coding using the specified corpus file." .
+                "Proceeding due to use of --force.\n";
+        }
+    }
+    else {
+        print STDERR "Insufficient number of nodes with sufficient exits to " .
+            "perform quality coding using the specified corpus file. " .
+            "Use --force to override (which would likely generate an " .
+            "absurdly long output text).\n" .
+            "Exiting.\n";
+        exit 1;
+    }
+}
+else {
+    if ($verbose) {
+        print STDERR "Sufficient number of nodes with sufficient exits to " .
+            "perform quality coding using the specified corpus file.\n";
+    }
+}
+
+
+###
+# Run the actual encoding or decoding  procedure
+###
+my $output_text = '';
+if ($operation eq 'enc') {
+
+    $output_text = encode($input, $transitions, $tokens);
+
+    if ($format) {
+        $output_text = format_text($output_text, $format);
+    }
+
+}
+elsif ($operation eq 'dec') {
+
+    $output_text = decode($input, $transitions, $tokens);
+
+}
+
+
+###
+# Output the results
+###
+if ($output_file) {
+    open('OUTPUT', '>', $output_file) || die "Can't write to $output_file.\n";
+    print OUTPUT $output_text;
+    close('OUTPUT');
+}
+else {
+    print $output_text;
+}
+
+
+###
+# clean_input
+# Removes all nonwords and HTML from input text
+#
+# Takes:
+#   - $input, a scalar containing the input to be cleaned
+# Returns:
+#   - $output, a scalar containing the cleaned data
+# Note:
+#   Yes, this is a silly thing to have isolated like this, but I'm doing so
+#   because the cleaning procedures need to be identical for encoding and
+#   decoding to work properly.  This approach saves time and repetition.
+###
+sub clean_input {
+    my ($input) = @_;
+
+    $input =~ s/\n/ /g;     ### Change newlines to spaces
+    $input =~ s/\<[^>]*//g; ### Strip out HTML (poorly -- we can't assume any
+    ### modules other than perl's core will be around)
+    $input =~ s/[^\w\']/ /g; ### Change non-word characters to spaces
+    $input =~ s/\d/ /g;      ### Change numbers to spaces
+    $input =~ s/\s+/ /g;     ### Change sequences of spaces to a single space
+    $input =~ s/^\s+//;      ### Trim leading whitespace
+    $input =~ s/\s+$//;      ### Trim trailing whitespace
+
+    return $input;
+}
+
+
+###
+# tokenize_corpus
+# Breaks the input corpus into a series of processable "tokens"
+#
+# Takes:
+#   - $corpus, a scalar containing the complete input corpus
+# Returns:
+#   - $tokens, an array reference of tokens (most likely "words") present
+# Output looks like:
+#   ['The','Project','Gutenberg', ... ,'about','new','eBooks']
+###
+sub tokenize_corpus {
+    my ($corpus) = @_;
+
+    $corpus = clean_input($corpus);
+    my @tokens = split(/\s/, $corpus);
+
+    return \@tokens;
+}
+
+
+###
+# generate_transitions
+# Creates the primary transition matrix for use in coding
+#
+# Takes:
+#   - @tokens, an array of tokens present, sequentially, in the corpus
+# Returns:
+#   - $transitions, the transition matrix
+# Output looks like:
+#    {
+#   	'atlantic' => {                      ### Lowercase form
+#                'seen' => 2,                ### How many fimes seen in corpus
+#                'exits' => {                ### Which words follow it?
+#                             'City' => 1,   ### One instance of this
+#                             'and' => 1     ### One instance of that
+#                           },               ### Exits not guaranteed unique
+#                'door' => [                 ### Doors are guaranteed unique
+#                            'City',         ### Following door number 1
+#                            'and'           ### Following door number 2
+#                          ],
+#                'doors' => 2,               ### Cached count of doors
+#                'token' => 'Atlantic'       ### Original form of the token
+#              },
+#    ...
+#    }
+###
+sub generate_transitions {
+    my @tokens = @_;
+    my $transitions = {};
+
+    ### Generate the initial transitions table
+    foreach my $index (0 .. scalar(@tokens)) {
+        my $token = $tokens[$index - 1];
+        my $key = lc($token);
+
+        $transitions->{$key}->{seen}++;
+        $transitions->{$key}->{token} = $token;
+
+        if ($tokens[$index + 1]) {
+            $transitions->{$key}->{exits}->{$tokens[$index + 1]}++;
+        }
+    }
+
+    ### Calculate the exits and doors
+    foreach my $transition (keys(%$transitions)) {
+        my @exits = keys(%{$transitions->{$transition}->{exits}});
+        $transitions->{$transition}->{door} = [];
+        my $found = {};
+        foreach my $exit (sort (keys(%{$transitions->{$transition}->{exits}}))) {
+            unless ($found->{lc($exit)}) {
+                push @{$transitions->{$transition}->{door}}, $exit;
+            }
+            $found->{lc($exit)} = 1;
+        }
+        $transitions->{$transition}->{doors} = scalar(
+            @{$transitions->{$transition}->{door}}
+        );
+        if ($transitions->{$transition}->{doors} > 15) {
+            $transitions->{$transition}->{meaningful} = 1;
+        }
+    }
+
+    return $transitions;
+}
+
+
+###
+# verify_exits
+# Returns whether this corpus will work well as an encoding or decoding medium
+#
+# Takes:
+#   - $transitions, the calculated transition matrix
+# Returns:
+#   - 1 if it will suffice, 0 if it probably won't.
+# Note:
+#   We are looking for how many nodes in the transition matrix have more than
+#   15 doors (as they'll need minimally 16 in order for the relationship
+#   between any node and its successor to be able to encode a binary nibble)
+#   If there are fewer than 10 of these, chances are the encoding / decoding
+#   is going to be of very low quality.
+###
+sub verify_exits {
+    my ($transitions) = @_;
+    my $count = 0;
+    my @meaningful = ();
+    foreach my $key (keys(%$transitions)) {
+        if ($transitions->{$key}->{doors} > 15) {
+            $count++;
+            push @meaningful, $key;
+        }
+    }
+    if ($verbose) {
+        print STDERR "$count meaningful transitions (" .
+            join(', ', @meaningful) . ")\n\n";
+    }
+    if ($count >= 7) {
+        return 1;
+    }
+    else {
+        return;
+    }
+}
+
+
+###
+# encode
+# Encodes an input file using the transition matrix calculated from the corpus
+# Takes:
+#   - $input, a scalar containing the input to be encoded
+#   - $transitions, the transition matrix calculated from the corpus
+#   - $tokens, an array reference of the token sequence from the key corpus
+# Returns:
+#   - $encoded_text, the encoded text
+###
+sub encode {
+    my ($input, $transitions, $tokens) = @_;
+
+    my $bits = unpack("b*", $input);
+    my $nibbles;
+    while (my $nibble = substr($bits, 0, 4, '')) {
+        push @$nibbles, bin2dec($nibble);
+    }
+
+    my $token = $tokens->[int(rand(scalar(@$tokens)))];
+    my $encoded_text = '';
+    my $last_token = $token; ### for debugging
+
+    while (scalar(@$nibbles)) {
+
+        if ($transitions->{lc($token)}->{meaningful}) {
+            $encoded_text .= $token . ' ';
+
+            ### This token means something.  Walk through the nibblth door.
+            my $nibble = shift(@$nibbles);
+            $token = $transitions->{lc($token)}->{door}->[$nibble];
+
+        }
+        else {
+            $encoded_text .= $token . ' ';
+
+            ### This token is irrelevant.  Stumble drunkenly through any door.
+            $token = $transitions->{lc($token)}->{door}->[
+                int(rand($transitions->{lc($token)}->{doors}))
+            ];
+        }
+
+        unless ($token) {
+            use Data::Dumper;
+            print "DEBUG:\ntoken = $token\n" .
+                "Transitions:" . Dumper($transitions->{lc($token)}) .
+                "\nlast_token = $last_token\n" .
+                "Transitions:" . Dumper($transitions->{lc($last_token)});
+            exit 1;
+        }
+
+        $last_token = $token;
+    }
+    $encoded_text .= $token . ' ';
+
+    return $encoded_text;
+}
+
+
+###
+# decode
+# Pieces an arbitrary binary sequence back together from ASCII input file
+#
+# Takes:
+#   - $output_text, text originally output by an encoding pass of this script
+#   - $transitions, the transition matrix generated from the key corpus
+#   - $tokens, an array reference of the token sequence from the key corpus
+# Returns:
+#   - $reconstituted, the decoded text
+###
+sub decode {
+    my ($input, $transitions, $tokens) = @_;
+
+    my $decoded = '';
+
+    $input = clean_input($input);
+
+    my @words = split(/\s+/, $input);
+
+    foreach my $i (0 .. scalar(@words) - 2) {
+        if ($transitions->{lc($words[$i])}->{meaningful}) {
+            ### We walked through a specific door.  Figure out which it was.
+            my $num_doors = scalar(@{$transitions->{lc($words[$i])}->{door}});
+            foreach my $j (0 .. $num_doors - 1) {
+                my $on_door = lc($transitions->{lc($words[$i])}->{door}->[$j]);
+                if ($on_door eq lc($words[$i + 1])) {
+                    my $binary = dec2bin($j);
+                    while (length($binary) < 4) {
+                        $binary = '0' . $binary;
+                    }
+                    $decoded .= $binary;
+                }
+            }
+        }
+        ### TODO: a later version of this should use the less meaningful
+        ### nodes to encode information as well.  Really, any node with more
+        ### than one exit can be used to encode something (minimally, a single
+        ### bit), so in that sense any node with more than 2 exits /could/ be
+        ### a door.  We'd just have to modify the coders for variable lengths.
+        ### For right now, we're just using nodes with 16 or more exits (so
+        ### they can encode minimally a nibble), then only making use of the
+        ### first 16 doors from that node. This can be improved with variable-
+        ### length encoding.
+    }
+
+    my $decoded_text = pack('b*', $decoded);
+    return $decoded_text;
+}
+
+
+###
+# format_text
+# Formats the output text to look like something human-created
+#
+# Takes:
+#   - $input_text, a scalar containing the text to be formatted
+#   - $format, a scalar specifying the desired format
+# Returns:
+#   - $output_text, a scalar containing the formatted text
+# Supported formats: none, essay, poem, scripture, email
+###
+sub format_text {
+    my ($input_text, $format) = @_;
+
+    my $formats = {
+        'none'    => sub {return join(' ', @_);},
+
+        'essay'   => sub {
+            my @words = @_;
+            return join(' ', @words);
+        },
+
+        'textile' => sub {
+            my @words = @_;
+            return join(' ', @words);
+        },
+
+        ###
+        # Poem format
+        ###
+        'poem'    => sub {
+            my @words = @_;
+
+            ### Form the words into sentences
+            my @puncts = ('.', ' ', ' ', ' ', ',', ',', ',', '!', '?');
+            my @sentences = ();
+            while (scalar(@words)) {
+                my $sentence_length = 6 + int(rand(3));
+                if (scalar(@words) < $sentence_length) {
+                    $sentence_length = scalar(@words);
+                }
+                my $sentence = join(' ', splice(@words, 0, $sentence_length, ()));
+                $sentence = ucfirst($sentence) . $puncts[int(rand(@puncts))];
+                push @sentences, $sentence;
+            }
+
+            ### Form the sentences into stanzas
+            my @stanzas = ();
+            while (scalar(@sentences)) {
+                my $stanza_length = 4 + int(rand(7));
+                if (scalar(@sentences) < $stanza_length) {
+                    $stanza_length = scalar(@sentences);
+                }
+                my $stanza = join("\n", splice(@sentences, 0, $stanza_length, ()));
+                push @stanzas, $stanza;
+            }
+            return join("\n\n", @stanzas);
+        },
+
+        ###
+        # Email format
+        ###
+        'email'   => sub {
+            my @words = @_;
+
+            my $greeting = ucfirst(shift(@words));
+            my $name = ucfirst(pop(@words));
+            my $thanks = pop(@words);
+
+            ### Form the words into sentences
+            my @puncts = qw/? . . . . . !/;
+            my @sentences = ();
+            while (scalar(@words)) {
+                my $sentence_length = 7 + int(rand(10));
+                if (scalar(@words) < $sentence_length) {
+                    $sentence_length = scalar(@words);
+                }
+                my $sentence = join(' ', splice(@words, 0, $sentence_length, ()));
+                $sentence = ucfirst($sentence) . $puncts[int(rand(@puncts))];
+                push @sentences, $sentence;
+            }
+
+            ### Form the sentences into paragraphs
+            my @paragraphs = ();
+            while (scalar(@sentences)) {
+                my $paragraph_length = 4 + int(rand(7));
+                if (scalar(@sentences) < $paragraph_length) {
+                    $paragraph_length = scalar(@sentences);
+                }
+                my $paragraph = join('  ', splice(@sentences, 0, $paragraph_length, ()));
+                push @paragraphs, $paragraph;
+            }
+            my $body = join("\n\n   ", @paragraphs);
+            return "$greeting,\n\n   $body\n\n$thanks,\n$name";
+        },
+    };
+    if ($formats->{$format}) {
+        return $formats->{$format}->(split(' ', $input_text));
+    }
+    else {
+        print STDERR "Unsupported format: $format\nExiting\n";
+        exit 1;
+    }
+}
+
+
+###
+# dec2bin
+# Converts a decimal numeric expression to binary
+#
+# Takes:
+#   - $decimal, a decimal expression of a number (e.g. '54')
+# Returns:
+#   - $binary, a binary expression of the same number (e.g. '110110')
+# Note:
+#   Sourced from Perl Cookbook (Christiansen & Torkington 1998), sec. 2.4
+###
+sub dec2bin {
+    my $str = unpack("B32", pack("N", shift));
+    $str =~ s/^0+(?=\d)//;
+    return $str;
+}
+
+
+###
+# bin2dec
+# Converts a binary numeric expression to decimal
+#
+# Takes:
+#   - $binary, a binary expression of the same number (e.g. '110110')
+# Returns:
+#   - $decimal, a decimal expression of a number (e.g. '54')
+# Note:
+#   Sourced from Perl Cookbook (Christiansen & Torkington 1998), sec. 2.4
+###
+sub bin2dec {
+    return unpack("N", pack("B32", substr("0" x 32 . shift, -32)));
+}
diff --git a/main/app/sprinkles/core/templates/pages/test.html.twig b/main/app/sprinkles/core/templates/pages/test.html.twig
index bcd1b26..7d84c09 100644
--- a/main/app/sprinkles/core/templates/pages/test.html.twig
+++ b/main/app/sprinkles/core/templates/pages/test.html.twig
@@ -1,3 +1,6 @@
+
+<script src="{{ assets.url('assets://SiteAssets/js/asemica.js') }}"></script>
+{#
 <form method="post" action="{{ site.uri.public }}/api/posts/image">
     {% include "forms/csrf.html.twig" %}
     <p>
@@ -34,4 +37,5 @@
             //window.location.reload();
         }
     });
-</script>
-\ No newline at end of file
+</script>
+#}
+\ No newline at end of file
author	Marvin Borner	2018-05-27 15:51:53 +0200
committer	Marvin Borner	2018-05-27 15:51:53 +0200
commit	2976f5ee4fed771d7eed8e4dfc790aa970840a9e (patch)
tree	c189626035c7a4721c53a96f8319557ea7a8bbb4
parent	205cbe2fbb3e29eb5930b307250b90a3d09a433d (diff)