From 2976f5ee4fed771d7eed8e4dfc790aa970840a9e Mon Sep 17 00:00:00 2001 From: Marvin Borner Date: Sun, 27 May 2018 15:51:53 +0200 Subject: Began javascript implementation of asemica (perl) encryption --- .../sprinkles/core/assets/SiteAssets/js/asemica.js | 89 +++ .../sprinkles/core/assets/SiteAssets/js/asemica.pl | 617 +++++++++++++++++++++ .../sprinkles/core/templates/pages/test.html.twig | 6 +- 3 files changed, 711 insertions(+), 1 deletion(-) create mode 100644 main/app/sprinkles/core/assets/SiteAssets/js/asemica.js create mode 100644 main/app/sprinkles/core/assets/SiteAssets/js/asemica.pl diff --git a/main/app/sprinkles/core/assets/SiteAssets/js/asemica.js b/main/app/sprinkles/core/assets/SiteAssets/js/asemica.js new file mode 100644 index 0000000..3f4de45 --- /dev/null +++ b/main/app/sprinkles/core/assets/SiteAssets/js/asemica.js @@ -0,0 +1,89 @@ +function asemica(PlainText, CorpusUrl) { + let CorpusString, Tokens, Transitions; + + fetch(CorpusUrl) + .then(function (response) { + response.text().then(function (response) { + CorpusString = response; + Tokens = tokenize_corpus(CorpusString); + Transitions = generate_transitions(Tokens); + console.log(Transitions); + }); + }); + + /* + * Breaks the input corpus into a series of processable "tokens" + * + * Example output: ['The','Project','Gutenberg', ... ,'about','new','eBooks'] + */ + function tokenize_corpus(CorpusString) { + // Clean up things + const StrippedCorpus = CorpusString + .replace(/\n/g, " ") // newlines + .replace(/<\/?[^>]+(>|$)/g, "") // html + .replace(/[^\w']/g, " ") // non-word characters + .replace(/[0-9]/g, " ") // numbers + .replace(/\s\s+/g, " ") // sequences of spaces + .replace(/^\s+/, "") // leading whitespace + .replace(/\s+$/, ""); // trailing whitespace + + Tokens = StrippedCorpus.split(/\s/); + + return Tokens; + } + + /* + * Creates the primary transition matrix + * + * Example output: + * + * 'atlantic' => { // Lowercase form + * 'seen' => 2, // How many times seen in corpus + * 'exits' => { // Which words follow it? + * 'City' => 1, // One instance of this + * 'and' => 1 // One instance of that + * }, // Exits not guaranteed unique + * 'door' => [ // Doors are guaranteed unique + * 'City', // Following door number 1 + * 'and' // Following door number 2 + * ], + * 'doors' => 2, // Cached count of doors + * 'token' => 'Atlantic' // Original form of the token + * } + * ... + */ + function generate_transitions(Tokens) { + Transitions = {}; + + Tokens.forEach(function(Token, Index) { + var ObjectKey = Token.toLowerCase(); + if (Transitions[ObjectKey] === undefined) { // Will run one time -> initialize + Transitions[ObjectKey] = {}; + Transitions[ObjectKey]["seen"] = 1; + Transitions[ObjectKey]["exits"] = {}; + Transitions[ObjectKey]["door"] = []; + if (Tokens[Index + 1] !== undefined) { + Transitions[ObjectKey]["exits"][Tokens[Index + 1]] += 1; + } + Transitions[ObjectKey]["token"] = ObjectKey; + } else { // Will run n times + var CurrentSeenValue = Transitions[ObjectKey]["seen"]; + Transitions[ObjectKey]["seen"] = CurrentSeenValue + 1; + } + }); + + Transitions.forEach(function(Transition) { + var Exits = Transitions[Transition]["exits"].sort(); + var Found = {}; + + Exits.forEach(function(Exit) { + + }); + + + }); + + return Transitions; + } + +} \ No newline at end of file diff --git a/main/app/sprinkles/core/assets/SiteAssets/js/asemica.pl b/main/app/sprinkles/core/assets/SiteAssets/js/asemica.pl new file mode 100644 index 0000000..c321935 --- /dev/null +++ b/main/app/sprinkles/core/assets/SiteAssets/js/asemica.pl @@ -0,0 +1,617 @@ +#!/usr/bin/perl -w +use strict; +use warnings; + + +### +# Asemica -- An asemic Markov-chained cipher +# Copyright (c) 2011 by Danne Stayskal +### + +### +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +### + +our $VERSION = '1.0'; + +use Getopt::Long; + +my $corpus_file = ''; +my $input_file = ''; +my $output_file = ''; +my $verbose = 0; +my $format = ''; +my $force = 0; +my $get_help = !scalar(@ARGV); +GetOptions( + "c|corpus=s" => \$corpus_file, + "i|input=s" => \$input_file, + "o|output=s" => \$output_file, + "v|verbose+" => \$verbose, + "f|format=s" => \$format, + "force+" => \$force, + "h|help+" => \$get_help, +); +my $operation = shift @ARGV; + +if ($verbose) { + print STDERR "Asemica version $VERSION running\n"; + print STDERR " Corpus file: $corpus_file\n" if $corpus_file; + print STDERR " Input file: $input_file\n" if $input_file; + print STDERR " Output file: $output_file\n" if $output_file; + print STDERR " Operation: $operation\n" if $operation; + print STDERR " Force: $force\n" if $force; + print STDERR " Help: $get_help\n" if $get_help; + print STDERR "\n"; +} + +my $usage = "Usage: $0 (enc|dec) -c [-i ] " . + "[-o ] [-f ] [--force] [--help]\n"; + +### +# If all they want is help (or they ran this with no args), give them help +### +if ($get_help) { + print STDERR <<__END_HELP__ +Asemica: an asemic Markov-chained cipher, v. $VERSION +$usage +OPTIONS: + -c/--corpus: specify corpus filename or URL + -i/--input: specify input filename (defaults to STDIN) + -o/--output: specify output filename (defaults to STDOUT) + -f/--format: specify output format (defaults to none) + --force: forces runtime on an insufficiently complex corpus + --help: displays this message + -v/--verbost: increments verbosity setting (used for debugging) +AVAILABLE FORMATS: + none: doesn't format output; returns only word list + email: formats output to look like an informal email + poem: if you want your output to look like poetry +EXAMPLES + echo "message" | $0 enc -c corpus.txt -o asemic.txt + $0 dec -c corpus.txt -i asemic.txt +__END_HELP__ + ; + exit 1; +} + + +### +# Make sure we have necessary and sufficient inputs (command line or STDIN) +### + +unless ($operation) { + print "No operation (encode or decode) specified.\n"; + print STDERR $usage; + exit 1; +} + +unless ($operation eq 'enc' || $operation eq 'dec') { + print "Invalid operation specified: $operation\n"; + print STDERR $usage; + exit 1; +} + +my $input = ''; +unless ($input_file) { + if ($verbose) { + print STDERR "No input file specified. Reading from STDIN...\n"; + } + $input = join('', ); +} +else { + open('INPUT', '<', $input_file) || die "Can't read $input_file"; + $input = join('', ); + close('INPUT'); +} + + +### +# Load and verify corpus +### + +my $corpus = ''; +unless ($corpus_file) { + print STDERR "No corpus specified. Can't operate without a corpus.\n"; + print STDERR $usage; + exit 1; +} +if (-f $corpus_file) { + ### It's a flat file. Load and move on. + open('CORPUS', '<', $corpus_file) || die "Can't read $corpus_file"; + $corpus = join('', ); + close('CORPUS'); + +} +elsif ($corpus_file =~ m/^https?/) { + ### It's a URI. Try to download it. + $corpus = `curl -s $corpus_file`; + + unless ($corpus) { + print STDERR "Unable to load corpus from $corpus_file\nExiting.\n"; + exit 1; + } + +} +else { + print STDERR "Couldn't read corpus at $corpus_file\nExiting.\n"; + exit 1; +} + + +### +# Calculate and verify corpus tokens and transition matrix +### +my $tokens = tokenize_corpus($corpus); +my $transitions = generate_transitions(@$tokens); +unless (verify_exits($transitions)) { + if ($force) { + if ($verbose) { + print STDERR "Insufficient number of nodes with sufficient exits " . + "to perform quality coding using the specified corpus file." . + "Proceeding due to use of --force.\n"; + } + } + else { + print STDERR "Insufficient number of nodes with sufficient exits to " . + "perform quality coding using the specified corpus file. " . + "Use --force to override (which would likely generate an " . + "absurdly long output text).\n" . + "Exiting.\n"; + exit 1; + } +} +else { + if ($verbose) { + print STDERR "Sufficient number of nodes with sufficient exits to " . + "perform quality coding using the specified corpus file.\n"; + } +} + + +### +# Run the actual encoding or decoding procedure +### +my $output_text = ''; +if ($operation eq 'enc') { + + $output_text = encode($input, $transitions, $tokens); + + if ($format) { + $output_text = format_text($output_text, $format); + } + +} +elsif ($operation eq 'dec') { + + $output_text = decode($input, $transitions, $tokens); + +} + + +### +# Output the results +### +if ($output_file) { + open('OUTPUT', '>', $output_file) || die "Can't write to $output_file.\n"; + print OUTPUT $output_text; + close('OUTPUT'); +} +else { + print $output_text; +} + + +### +# clean_input +# Removes all nonwords and HTML from input text +# +# Takes: +# - $input, a scalar containing the input to be cleaned +# Returns: +# - $output, a scalar containing the cleaned data +# Note: +# Yes, this is a silly thing to have isolated like this, but I'm doing so +# because the cleaning procedures need to be identical for encoding and +# decoding to work properly. This approach saves time and repetition. +### +sub clean_input { + my ($input) = @_; + + $input =~ s/\n/ /g; ### Change newlines to spaces + $input =~ s/\<[^>]*//g; ### Strip out HTML (poorly -- we can't assume any + ### modules other than perl's core will be around) + $input =~ s/[^\w\']/ /g; ### Change non-word characters to spaces + $input =~ s/\d/ /g; ### Change numbers to spaces + $input =~ s/\s+/ /g; ### Change sequences of spaces to a single space + $input =~ s/^\s+//; ### Trim leading whitespace + $input =~ s/\s+$//; ### Trim trailing whitespace + + return $input; +} + + +### +# tokenize_corpus +# Breaks the input corpus into a series of processable "tokens" +# +# Takes: +# - $corpus, a scalar containing the complete input corpus +# Returns: +# - $tokens, an array reference of tokens (most likely "words") present +# Output looks like: +# ['The','Project','Gutenberg', ... ,'about','new','eBooks'] +### +sub tokenize_corpus { + my ($corpus) = @_; + + $corpus = clean_input($corpus); + my @tokens = split(/\s/, $corpus); + + return \@tokens; +} + + +### +# generate_transitions +# Creates the primary transition matrix for use in coding +# +# Takes: +# - @tokens, an array of tokens present, sequentially, in the corpus +# Returns: +# - $transitions, the transition matrix +# Output looks like: +# { +# 'atlantic' => { ### Lowercase form +# 'seen' => 2, ### How many fimes seen in corpus +# 'exits' => { ### Which words follow it? +# 'City' => 1, ### One instance of this +# 'and' => 1 ### One instance of that +# }, ### Exits not guaranteed unique +# 'door' => [ ### Doors are guaranteed unique +# 'City', ### Following door number 1 +# 'and' ### Following door number 2 +# ], +# 'doors' => 2, ### Cached count of doors +# 'token' => 'Atlantic' ### Original form of the token +# }, +# ... +# } +### +sub generate_transitions { + my @tokens = @_; + my $transitions = {}; + + ### Generate the initial transitions table + foreach my $index (0 .. scalar(@tokens)) { + my $token = $tokens[$index - 1]; + my $key = lc($token); + + $transitions->{$key}->{seen}++; + $transitions->{$key}->{token} = $token; + + if ($tokens[$index + 1]) { + $transitions->{$key}->{exits}->{$tokens[$index + 1]}++; + } + } + + ### Calculate the exits and doors + foreach my $transition (keys(%$transitions)) { + my @exits = keys(%{$transitions->{$transition}->{exits}}); + $transitions->{$transition}->{door} = []; + my $found = {}; + foreach my $exit (sort (keys(%{$transitions->{$transition}->{exits}}))) { + unless ($found->{lc($exit)}) { + push @{$transitions->{$transition}->{door}}, $exit; + } + $found->{lc($exit)} = 1; + } + $transitions->{$transition}->{doors} = scalar( + @{$transitions->{$transition}->{door}} + ); + if ($transitions->{$transition}->{doors} > 15) { + $transitions->{$transition}->{meaningful} = 1; + } + } + + return $transitions; +} + + +### +# verify_exits +# Returns whether this corpus will work well as an encoding or decoding medium +# +# Takes: +# - $transitions, the calculated transition matrix +# Returns: +# - 1 if it will suffice, 0 if it probably won't. +# Note: +# We are looking for how many nodes in the transition matrix have more than +# 15 doors (as they'll need minimally 16 in order for the relationship +# between any node and its successor to be able to encode a binary nibble) +# If there are fewer than 10 of these, chances are the encoding / decoding +# is going to be of very low quality. +### +sub verify_exits { + my ($transitions) = @_; + my $count = 0; + my @meaningful = (); + foreach my $key (keys(%$transitions)) { + if ($transitions->{$key}->{doors} > 15) { + $count++; + push @meaningful, $key; + } + } + if ($verbose) { + print STDERR "$count meaningful transitions (" . + join(', ', @meaningful) . ")\n\n"; + } + if ($count >= 7) { + return 1; + } + else { + return; + } +} + + +### +# encode +# Encodes an input file using the transition matrix calculated from the corpus +# Takes: +# - $input, a scalar containing the input to be encoded +# - $transitions, the transition matrix calculated from the corpus +# - $tokens, an array reference of the token sequence from the key corpus +# Returns: +# - $encoded_text, the encoded text +### +sub encode { + my ($input, $transitions, $tokens) = @_; + + my $bits = unpack("b*", $input); + my $nibbles; + while (my $nibble = substr($bits, 0, 4, '')) { + push @$nibbles, bin2dec($nibble); + } + + my $token = $tokens->[int(rand(scalar(@$tokens)))]; + my $encoded_text = ''; + my $last_token = $token; ### for debugging + + while (scalar(@$nibbles)) { + + if ($transitions->{lc($token)}->{meaningful}) { + $encoded_text .= $token . ' '; + + ### This token means something. Walk through the nibblth door. + my $nibble = shift(@$nibbles); + $token = $transitions->{lc($token)}->{door}->[$nibble]; + + } + else { + $encoded_text .= $token . ' '; + + ### This token is irrelevant. Stumble drunkenly through any door. + $token = $transitions->{lc($token)}->{door}->[ + int(rand($transitions->{lc($token)}->{doors})) + ]; + } + + unless ($token) { + use Data::Dumper; + print "DEBUG:\ntoken = $token\n" . + "Transitions:" . Dumper($transitions->{lc($token)}) . + "\nlast_token = $last_token\n" . + "Transitions:" . Dumper($transitions->{lc($last_token)}); + exit 1; + } + + $last_token = $token; + } + $encoded_text .= $token . ' '; + + return $encoded_text; +} + + +### +# decode +# Pieces an arbitrary binary sequence back together from ASCII input file +# +# Takes: +# - $output_text, text originally output by an encoding pass of this script +# - $transitions, the transition matrix generated from the key corpus +# - $tokens, an array reference of the token sequence from the key corpus +# Returns: +# - $reconstituted, the decoded text +### +sub decode { + my ($input, $transitions, $tokens) = @_; + + my $decoded = ''; + + $input = clean_input($input); + + my @words = split(/\s+/, $input); + + foreach my $i (0 .. scalar(@words) - 2) { + if ($transitions->{lc($words[$i])}->{meaningful}) { + ### We walked through a specific door. Figure out which it was. + my $num_doors = scalar(@{$transitions->{lc($words[$i])}->{door}}); + foreach my $j (0 .. $num_doors - 1) { + my $on_door = lc($transitions->{lc($words[$i])}->{door}->[$j]); + if ($on_door eq lc($words[$i + 1])) { + my $binary = dec2bin($j); + while (length($binary) < 4) { + $binary = '0' . $binary; + } + $decoded .= $binary; + } + } + } + ### TODO: a later version of this should use the less meaningful + ### nodes to encode information as well. Really, any node with more + ### than one exit can be used to encode something (minimally, a single + ### bit), so in that sense any node with more than 2 exits /could/ be + ### a door. We'd just have to modify the coders for variable lengths. + ### For right now, we're just using nodes with 16 or more exits (so + ### they can encode minimally a nibble), then only making use of the + ### first 16 doors from that node. This can be improved with variable- + ### length encoding. + } + + my $decoded_text = pack('b*', $decoded); + return $decoded_text; +} + + +### +# format_text +# Formats the output text to look like something human-created +# +# Takes: +# - $input_text, a scalar containing the text to be formatted +# - $format, a scalar specifying the desired format +# Returns: +# - $output_text, a scalar containing the formatted text +# Supported formats: none, essay, poem, scripture, email +### +sub format_text { + my ($input_text, $format) = @_; + + my $formats = { + 'none' => sub {return join(' ', @_);}, + + 'essay' => sub { + my @words = @_; + return join(' ', @words); + }, + + 'textile' => sub { + my @words = @_; + return join(' ', @words); + }, + + ### + # Poem format + ### + 'poem' => sub { + my @words = @_; + + ### Form the words into sentences + my @puncts = ('.', ' ', ' ', ' ', ',', ',', ',', '!', '?'); + my @sentences = (); + while (scalar(@words)) { + my $sentence_length = 6 + int(rand(3)); + if (scalar(@words) < $sentence_length) { + $sentence_length = scalar(@words); + } + my $sentence = join(' ', splice(@words, 0, $sentence_length, ())); + $sentence = ucfirst($sentence) . $puncts[int(rand(@puncts))]; + push @sentences, $sentence; + } + + ### Form the sentences into stanzas + my @stanzas = (); + while (scalar(@sentences)) { + my $stanza_length = 4 + int(rand(7)); + if (scalar(@sentences) < $stanza_length) { + $stanza_length = scalar(@sentences); + } + my $stanza = join("\n", splice(@sentences, 0, $stanza_length, ())); + push @stanzas, $stanza; + } + return join("\n\n", @stanzas); + }, + + ### + # Email format + ### + 'email' => sub { + my @words = @_; + + my $greeting = ucfirst(shift(@words)); + my $name = ucfirst(pop(@words)); + my $thanks = pop(@words); + + ### Form the words into sentences + my @puncts = qw/? . . . . . !/; + my @sentences = (); + while (scalar(@words)) { + my $sentence_length = 7 + int(rand(10)); + if (scalar(@words) < $sentence_length) { + $sentence_length = scalar(@words); + } + my $sentence = join(' ', splice(@words, 0, $sentence_length, ())); + $sentence = ucfirst($sentence) . $puncts[int(rand(@puncts))]; + push @sentences, $sentence; + } + + ### Form the sentences into paragraphs + my @paragraphs = (); + while (scalar(@sentences)) { + my $paragraph_length = 4 + int(rand(7)); + if (scalar(@sentences) < $paragraph_length) { + $paragraph_length = scalar(@sentences); + } + my $paragraph = join(' ', splice(@sentences, 0, $paragraph_length, ())); + push @paragraphs, $paragraph; + } + my $body = join("\n\n ", @paragraphs); + return "$greeting,\n\n $body\n\n$thanks,\n$name"; + }, + }; + if ($formats->{$format}) { + return $formats->{$format}->(split(' ', $input_text)); + } + else { + print STDERR "Unsupported format: $format\nExiting\n"; + exit 1; + } +} + + +### +# dec2bin +# Converts a decimal numeric expression to binary +# +# Takes: +# - $decimal, a decimal expression of a number (e.g. '54') +# Returns: +# - $binary, a binary expression of the same number (e.g. '110110') +# Note: +# Sourced from Perl Cookbook (Christiansen & Torkington 1998), sec. 2.4 +### +sub dec2bin { + my $str = unpack("B32", pack("N", shift)); + $str =~ s/^0+(?=\d)//; + return $str; +} + + +### +# bin2dec +# Converts a binary numeric expression to decimal +# +# Takes: +# - $binary, a binary expression of the same number (e.g. '110110') +# Returns: +# - $decimal, a decimal expression of a number (e.g. '54') +# Note: +# Sourced from Perl Cookbook (Christiansen & Torkington 1998), sec. 2.4 +### +sub bin2dec { + return unpack("N", pack("B32", substr("0" x 32 . shift, -32))); +} diff --git a/main/app/sprinkles/core/templates/pages/test.html.twig b/main/app/sprinkles/core/templates/pages/test.html.twig index bcd1b26..7d84c09 100644 --- a/main/app/sprinkles/core/templates/pages/test.html.twig +++ b/main/app/sprinkles/core/templates/pages/test.html.twig @@ -1,3 +1,6 @@ + + +{#
{% include "forms/csrf.html.twig" %}

@@ -34,4 +37,5 @@ //window.location.reload(); } }); - \ No newline at end of file + +#} \ No newline at end of file -- cgit v1.2.3