#! /usr/bin/env crystal # # This script generates the file spec/std/string/graphemes_break_spec.cr # that contains test cases for Unicode grapheme clusters based on the default # Grapheme_Cluster_Break Test # http://www.unicode.org/Public/x.y.z/ucd/auxiliary/GraphemeBreakTest.txt require "http/client" require "../src/compiler/crystal/formatter" UCD_ROOT = "http://www.unicode.org/Public/#{Unicode::VERSION}/ucd/" url = "#{UCD_ROOT}auxiliary/GraphemeBreakTest.txt" path = "#{__DIR__}/../spec/std/string/grapheme_break_spec.cr" def string_or_char(string) if string.size == 1 string[0] else string end end File.open(path, "w") do |file| file.puts <<-CRYSTAL # This file was automatically generated by running: # # scripts/generate_grapheme_break_spec.cr # # See https://www.unicode.org/license.html for the Unicode license agreement. # DO NOT EDIT require "./spec_helper" describe "String#each_grapheme" do CRYSTAL HTTP::Client.get(url).body.each_line do |line| next if line.starts_with?('#') format, _, comment = line.partition('#') # TODO: implement grapheme boundary rule GB9c in UAX29 pending = comment.includes?("[9.3]") graphemes = [] of String | Char string = String.build do |io| grapheme = String::Builder.new format.split.in_groups_of(2) do |ary| operator, codepoint = ary break if codepoint.nil? char = codepoint.to_i(16).chr io << char case operator when "÷" unless grapheme.empty? graphemes << string_or_char(grapheme.to_s) end grapheme = String::Builder.new when "×" else raise "Unexpected operator #{operator.inspect}" end grapheme << char end graphemes << string_or_char(grapheme.to_s) end file.puts " #{%(pending "GB9c" { ) if pending} it_iterates_graphemes #{string.dump}, [#{graphemes.join(", ", &.dump)}] #{" }" if pending} # #{comment}" end file.puts "end" end `crystal tool format #{path}`