因此,您要从一组52个字符(A-Z
和a-z
)中生成700万个字符串,每个字符串包含2到4个字符。
一些简单的数学告诉我们,有2,704个可能的2个字符的字符串,140,608个3个字符的字符串和7,311,616个可能的4个字符的字符串。总共有7,454,928个可能的字符串。
因此,创建一个包含从0到7,927的所有数字的数组。 Shuffle it,然后从阵列中选择前700万。
当然,您必须编写代码将该数字转换为输出字符串之一。那很容易。 0是“ AA”,51是“ Az”,52是“ BA”。您基本上是在进行整数到字符串的转换。
这是基本概念。您必须转换为Java。
arraySize = 7454928
numbers = new array[arraySize]
// populate array
for i = 0 to arraySize-1
numbers[i] = i
shuffle(numbers)
for i = 0 to 7000000
code = convertNumberToString(numbers[i])
print code
convertNumberToString(n)
Alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
outputString = ""
while (n > 0 || outputString.length < 2)
result = n / 52 // length of alphabet
remainder = n % 52;
outputString.append(Alphabet[remainder])
n = result;
// outputString now contains the characters,// but they're reversed. So reverse the string.
return outputString.reverse()
,
要生成n个随机字符串,我们至少需要O(n)个步骤。要检查是否已经生成新的字符串,您需要一个好的搜索算法。因此,搜索可能需要O(n ^ 2)或O(nlogn)或O(logn)等,这取决于您使用的算法。如何通过为随机生成的字符串添加索引(例如1,2,3 ... 7,000,000)前缀来完全消除搜索步骤?如果您不喜欢十进制前缀,则可以使用十六进制前缀,等等。
希望这可以有所改善。
添加了C#示例程序。
using System;
using System.Collections.Generic;
namespace Test
{
class Program
{
public static List<char> GeneratePrintableCharArray(int numFirstChars = 256)
// Go thru numFirstChars first Unicode characters and extract
// non-control and non-white space characters into a list.
// From the default first 256 unicode chars,we can have 189 characters.
{
List<Char> printableChars = new List<char>();
for (int i = char.MinValue; i < numFirstChars && i <= char.MaxValue; i++)
{
char c = Convert.ToChar(i);
if (!char.IsControl(c) && !char.IsWhiteSpace(c))
{
printableChars.Add(c);
}
}
return printableChars;
}
static void Main(string[] args)
{
// If we want to use 2 "digits" to store the index upto 7 million,// we need 2645-base number system
// extract the first 2645 Uniocde characters and the null character
List<char> chars = GeneratePrintableCharArray(2713);
chars.Insert(0,'\0');
List<string> idxList = new List<string>();
List<string> randomList = new List<string>();
int maxNumStrings = 7000000;
bool stop = false;
// 1. Generate "2-digit" index string list and up-to 2 character string list
for (int i = 0; i < chars.Count && !stop; i++)
{
for (int j = 0; j < chars.Count && !stop; j++)
{
string s = "";
if (i > 0 && j > 0)
{
s += new string(chars[i],1);
s += new string(chars[j],1);
idxList.Add(s);
}
else if (i > 0)
{
s += new string(chars[i],1);
}
else if (j > 0)
{
s += new string(chars[j],1);
}
randomList.Add(s);
if (idxList.Count == maxNumStrings)
{
stop = true;
}
}
}
// 2. Append random suffix for the index
Random rnd = new Random();
for (int i = 0; i < idxList.Count; i++)
{
idxList[i] += randomList[rnd.Next(0,randomList.Count - 1)];
}
// Here,we have 7 mil random strings.
// 3. Just for testing - to make sure we don't have multiple items with the same string
HashSet<string> set = new HashSet<string>();
for (int i = 0; i < idxList.Count; i++)
{
if (set.Contains(idxList[i]))
{
Console.WriteLine("There is a bug. Duplicate string found: " + idxList[i]);
}
else
{
set.Add(idxList[i]);
}
}
}
}
}
本文链接:https://www.f2er.com/3134309.html